Model

library(DiagrammeR) 
# Nodes
 #node [shape = box]
 # S [label = 'Matched\n(S=1)',fontsize=7]
 # C [label = 'Not censored\n(C=0)',fontsize=7]
gr1<-
DiagrammeR::grViz("
digraph causal {

# Nodes
  node [shape = plaintext]
  a [label = 'Observed\nConfounders\n(Z)',fontsize=10]
  b [label = 'Unobserved\nConfounders\n(U)',fontsize=10]
  c [label = 'Early\nDrop-out\n(Y)',fontsize=10]
  d [label = 'Residential\nPrograms\n(X)',fontsize=10]

# Edges
  edge [color = black,
        arrowhead = vee]
  rankdir = TB;
  
  b -> c 
  b -> a 
  a -> c  

  d -> c [minlen=1]
  d -> a [minlen=1]
  
 # a -> S #[minlen=1]
 # Z -> S #[minlen=1]
  
#  a -> C #[minlen=3]
#  Z -> C #[minlen=3]
  { rank = same; b; a; c }
# { rank = same; S; C }
  { rankdir = LR; a; d }

# Graph
  graph [overlap = true]
}")
gr1

Figure 1. Directed Acyclic Graph

#  {rank=same ; A -> B -> C -> D};
#       {rank=same ;           F -> E[dir=back]};
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3733703/
#Cohort matching on a variable associated with both outcome and censoring
#Cohort matching on a confounder. We let A denote an exposure, Y denote an outcome, and C denote a confounder and matching variable. The variable S indicates whether an individual in the source population is selected for the matched study (1: selected, 0: not selected). See Section 2-7 for details.
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7064555/
gr2<-
DiagrammeR::grViz("
digraph causal {

  # Nodes
  node [shape = plaintext]
  a [label = 'Residential\nPrograms\n(X)',fontsize=10]
  b [label = 'Unobserved\nConfounders\n(U)',fontsize=10]
  c [label = 'Early\nDrop-out\n(Y)',fontsize=10]
  d [label = 'Observed\nConfounders\n(Z)',fontsize=10]

  # Edges
  edge [color = black,
        arrowhead = vee]
  rankdir = TB
  a -> c [minlen=3]
  d -> a [minlen=3]
  d -> c [minlen=9]
  
  b -> a [minlen=1]
  b -> c
  
{ rank = same; c; d }
#{ rank = same; b; d }
  rankdir = TB
{ rank = same; d; c } #Ver si lo saco, creo que da problemas
  
  # Graph
  graph [overlap = true]
}")#LR

Balance

We selected the cases that had completed treatments, leaving 101,910 observations. Then, we selected those cases that had either early drop-outs and late drop-outs (n=55,908). Must take note that we did not include those that did not finished their treatment (n= 9), but we included those that had more than one finished treatment previous to the selected (n= 11,834).


We selected the following variables of interest:

  • ā€œStarting Substanceā€ (sus_ini_mvv)
  • ā€œMarital Statusā€ (estado_conyugal_2)
  • ā€œEducational Attainmentā€ (escolaridad_rec)
  • ā€œOccupational Statusā€ (condicion_ocupacional_corr)
  • ā€œAge of Onset of Drug Useā€ (edad_ini_cons)
  • ā€œFrequency of use of primary drugā€ (freq_cons_sus_prin)
  • ā€œMotive of Admission to Treatmentā€ (origen_ingreso_mod)
  • ā€œPsychiatric comorbidityā€ (dg_cie_10_rec)
  • ā€œChilean Region of the Centerā€ (nombre_region)
  • ā€œType of Center (Public)ā€ (tipo_centro_pub)
  • ā€œEarly Dropout (Against Staff Advice)ā€ (abandono_temprano_rec) (Y)
  • ā€œResidential Type of Planā€ (tipo_de_plan_res) (Z)
  • ā€œEvaluation of the Therapeutic Process (Min. Achievement)ā€ (*) (evaluacindelprocesoteraputico_min)
  • ā€œNo.Ā of Previous Treatmentsā€ (prev_treat)
  • ā€œSexā€ (sexo_2)
  • ā€œAge at Admission to Treatmentā€ (edad_al_ing)
  • ā€œDate of Admission to Treatmentā€ (fech_ing_num)


We generated a correlation plot to get an overview of heterogeneous correlations between the different variables.


require(polycor)
## Loading required package: polycor
#Corresponde a la apreciación clínica que hace el equipo o profesional tratante, la persona en tratamiento y su familia, del nivel alcanzado de logro de los objetivos terapéuticos planteados al inicio del proceso y descritos en el plan de tratamiento personalizado. Los criterios incluyen la evaluación del estado clínico y psicosocial al momento del egreso y una apreciación pronostica del equipo tratante.

match.on_tot <- c("row", "hash_key","sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","condicion_ocupacional_corr","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","tipo_centro_pub","abandono_temprano_rec","tipo_de_plan_res","evaluacindelprocesoteraputico_min","prev_treat","sexo_2","edad_al_ing","fech_ing_num")

#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(evaluacindelprocesoteraputico)

CONS_C1_df_dup_SEP_2020_match<-
  CONS_C1_df_dup_SEP_2020 %>% 
#:#:#:#:#:#:#:#:
  #sacar tratamientos truncados
  dplyr::filter(as.character(motivodeegreso_mod_imp) %in% c("Early Drop-out","Late Drop-out")) %>% 
#:#:#:#:#:#:#:#:
  #dplyr::mutate(tipo_de_plan_ambulatorio=ifelse(grepl("-PA",as.character(tipo_de_plan_2)),TRUE,FALSE)) %>% 
  dplyr::mutate(tipo_de_plan_res=if_else(as.character(tipo_de_plan_2) %in% c("M-PR","PG-PR"),TRUE,FALSE,NA)) %>% 
  dplyr::mutate(abandono_temprano_rec=ifelse(as.character(motivodeegreso_mod_imp)=="Early Drop-out",TRUE,FALSE)) %>% 
  dplyr::mutate(evaluacindelprocesoteraputico_min=ifelse(as.character(evaluacindelprocesoteraputico)=="3-Logro Minimo",TRUE,FALSE)) %>% 
  dplyr::mutate(tipo_centro_pub=if_else(as.character(tipo_centro)=="Public",TRUE,FALSE,NA)) %>% 
  dplyr::mutate(prev_treat=factor(dplyr::case_when(dup>=3~"2 or more",
                                                   TRUE~as.character(dup-1)))) %>% 
  dplyr::mutate(condicion_ocupacional_corr=factor(condicion_ocupacional_corr)) %>% 
  dplyr::select_(.dots = match.on_tot) %>% 
  data.table::data.table()
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
#CONS_C1_df_dup_SEP_2020_match %>% janitor::tabyl(prev_treat)
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(motivodeegreso_mod_imp)

#monthly_dataset_comp_wise_dt<-monthly_dataset_comp_wise%>% dplyr::select_if(is.numeric)%>% dplyr::select(-stringency_ind_oxf,-owid_gbd_year)%>% dplyr::select(-ends_with("mth")) %>% data.frame()

#Computes a heterogenous correlation matrix, consisting of Pearson product-moment correlations between numeric variables, polyserial correlations between numeric and ordinal variables, and polychoric correlations between 
tiempo_antes_hetcor<-Sys.time()
hetcor_mat<-hetcor(CONS_C1_df_dup_SEP_2020_match[,-c("hash_key","row")], ML = T, std.err =T, use="pairwise.complete.obs", bins=3, pd=TRUE)
tiempo_despues_hetcor<-Sys.time()
tiempo_hetcor<-tiempo_despues_hetcor-tiempo_antes_hetcor
#umx_hetcor_mat<-umx::umxHetCor(CONS_C1_df_dup_SEP_2020_match[,-c("hash_key","row")], ML = T, use = c("pairwise.complete.obs"), treatAllAsFactor = FALSE, verbose = F)

#ttr(umx_hetcor_mat,"dimnames")[[2]][1]<-"Starting Substance"
#attr(umx_hetcor_mat,"dimnames")[[2]][2]<-"Marital Status"
#attr(umx_hetcor_mat,"dimnames")[[2]][3]<-"Educational Attainment"
#attr(umx_hetcor_mat,"dimnames")[[2]][4]<-"Occupational Status"
#attr(umx_hetcor_mat,"dimnames")[[2]][5]<-"Age of Onset of Drug Use"
#attr(umx_hetcor_mat,"dimnames")[[2]][6]<-"Frequency of use of primary drug"
#attr(umx_hetcor_mat,"dimnames")[[2]][7]<-"Motive of Admission to Treatment"
#attr(umx_hetcor_mat,"dimnames")[[2]][8]<-"Psychiatric comorbidity"
#attr(umx_hetcor_mat,"dimnames")[[2]][9]<-"Chilean Region of the Center"
#attr(umx_hetcor_mat,"dimnames")[[2]][10]<-"Type of Center (Public)"
#attr(umx_hetcor_mat,"dimnames")[[2]][11]<-"Early Dropout"
#attr(umx_hetcor_mat,"dimnames")[[2]][12]<-"Residential Type of Plan"
#attr(umx_hetcor_mat,"dimnames")[[2]][13]<-"Evaluation of the Therapeutic Process (Min. Achievement)"
#attr(umx_hetcor_mat,"dimnames")[[2]][14]<-"No. of Previous Treatments"
#attr(umx_hetcor_mat,"dimnames")[[2]][15]<-"Sex"
#attr(umx_hetcor_mat,"dimnames")[[2]][16]<-"Age at Admission"

#attr(umx_hetcor_mat,"dimnames")[[1]][1]<-"Starting Substance"
#attr(umx_hetcor_mat,"dimnames")[[1]][2]<-"Marital Status"
#attr(umx_hetcor_mat,"dimnames")[[1]][3]<-"Educational Attainment"
#attr(umx_hetcor_mat,"dimnames")[[1]][4]<-"Occupational Status"
#attr(umx_hetcor_mat,"dimnames")[[1]][5]<-"Age of Onset of Drug Use"
#attr(umx_hetcor_mat,"dimnames")[[1]][6]<-"Frequency of use of primary drug"
#attr(umx_hetcor_mat,"dimnames")[[1]][7]<-"Motive of Admission to Treatment"
#attr(umx_hetcor_mat,"dimnames")[[1]][8]<-"Psychiatric comorbidity"
#attr(umx_hetcor_mat,"dimnames")[[1]][9]<-"Chilean Region of the Center"
#attr(umx_hetcor_mat,"dimnames")[[1]][10]<-"Type of Center (Public)"
#attr(umx_hetcor_mat,"dimnames")[[1]][11]<-"Early Dropout"
#attr(umx_hetcor_mat,"dimnames")[[1]][12]<-"Residential Type of Plan"
#attr(umx_hetcor_mat,"dimnames")[[1]][13]<-"Evaluation of the Therapeutic Process (Min. Achievement)"
#attr(umx_hetcor_mat,"dimnames")[[1]][14]<-"No. of Previous Treatments"
#attr(umx_hetcor_mat,"dimnames")[[1]][15]<-"Sex"
#attr(umx_hetcor_mat,"dimnames")[[1]][16]<-"Age at Admission"

attr(hetcor_mat$correlations,"dimnames")[[2]][1]<-"Starting Substance"
attr(hetcor_mat$correlations,"dimnames")[[2]][2]<-"Marital Status"
attr(hetcor_mat$correlations,"dimnames")[[2]][3]<-"Educational Attainment"
attr(hetcor_mat$correlations,"dimnames")[[2]][4]<-"Occupational Status"
attr(hetcor_mat$correlations,"dimnames")[[2]][5]<-"Age of Onset of Drug Use"
attr(hetcor_mat$correlations,"dimnames")[[2]][6]<-"Frequency of use of primary drug"
attr(hetcor_mat$correlations,"dimnames")[[2]][7]<-"Motive of Admission to Treatment"
attr(hetcor_mat$correlations,"dimnames")[[2]][8]<-"Psychiatric comorbidity"
attr(hetcor_mat$correlations,"dimnames")[[2]][9]<-"Chilean Region of the Center"
attr(hetcor_mat$correlations,"dimnames")[[2]][10]<-"Type of Center (Public)"
attr(hetcor_mat$correlations,"dimnames")[[2]][11]<-"Early Dropout"
attr(hetcor_mat$correlations,"dimnames")[[2]][12]<-"Residential Type of Plan"
attr(hetcor_mat$correlations,"dimnames")[[2]][13]<-"Evaluation of the Therapeutic Process (Min. Achievement)"
attr(hetcor_mat$correlations,"dimnames")[[2]][14]<-"No. of Previous Treatments"
attr(hetcor_mat$correlations,"dimnames")[[2]][15]<-"Sex"
attr(hetcor_mat$correlations,"dimnames")[[2]][16]<-"Age at Admission"
attr(hetcor_mat$correlations,"dimnames")[[2]][17]<-"Date of Admission"

attr(hetcor_mat$correlations,"dimnames")[[1]][1]<-"Starting Substance"
attr(hetcor_mat$correlations,"dimnames")[[1]][2]<-"Marital Status"
attr(hetcor_mat$correlations,"dimnames")[[1]][3]<-"Educational Attainment"
attr(hetcor_mat$correlations,"dimnames")[[1]][4]<-"Occupational Status"
attr(hetcor_mat$correlations,"dimnames")[[1]][5]<-"Age of Onset of Drug Use"
attr(hetcor_mat$correlations,"dimnames")[[1]][6]<-"Frequency of use of primary drug"
attr(hetcor_mat$correlations,"dimnames")[[1]][7]<-"Motive of Admission to Treatment"
attr(hetcor_mat$correlations,"dimnames")[[1]][8]<-"Psychiatric comorbidity"
attr(hetcor_mat$correlations,"dimnames")[[1]][9]<-"Chilean Region of the Center"
attr(hetcor_mat$correlations,"dimnames")[[1]][10]<-"Type of Center (Public)"
attr(hetcor_mat$correlations,"dimnames")[[1]][11]<-"Early Dropout"
attr(hetcor_mat$correlations,"dimnames")[[1]][12]<-"Residential Type of Plan"
attr(hetcor_mat$correlations,"dimnames")[[1]][13]<-"Evaluation of the Therapeutic Process (Min. Achievement)"
attr(hetcor_mat$correlations,"dimnames")[[1]][14]<-"No. of Previous Treatments"
attr(hetcor_mat$correlations,"dimnames")[[1]][15]<-"Sex"
attr(hetcor_mat$correlations,"dimnames")[[1]][16]<-"Age at Admission"
attr(hetcor_mat$correlations,"dimnames")[[1]][17]<-"Date of Admission"

attr(hetcor_mat$tests,"dimnames")[[2]][1]<-"Starting Substance"
attr(hetcor_mat$tests,"dimnames")[[2]][2]<-"Marital Status"
attr(hetcor_mat$tests,"dimnames")[[2]][3]<-"Educational Attainment"
attr(hetcor_mat$tests,"dimnames")[[2]][4]<-"Occupational Status"
attr(hetcor_mat$tests,"dimnames")[[2]][5]<-"Age of Onset of Drug Use"
attr(hetcor_mat$tests,"dimnames")[[2]][6]<-"Frequency of use of primary drug"
attr(hetcor_mat$tests,"dimnames")[[2]][7]<-"Motive of Admission to Treatment"
attr(hetcor_mat$tests,"dimnames")[[2]][8]<-"Psychiatric comorbidity"
attr(hetcor_mat$tests,"dimnames")[[2]][9]<-"Chilean Region of the Center"
attr(hetcor_mat$tests,"dimnames")[[2]][10]<-"Type of Center (Public)"
attr(hetcor_mat$tests,"dimnames")[[2]][11]<-"Early Dropout"
attr(hetcor_mat$tests,"dimnames")[[2]][12]<-"Residential Type of Plan"
attr(hetcor_mat$tests,"dimnames")[[2]][13]<-"Evaluation of the Therapeutic Process (Min. Achievement)"
attr(hetcor_mat$tests,"dimnames")[[2]][14]<-"No. of Previous Treatments"
attr(hetcor_mat$tests,"dimnames")[[2]][15]<-"Sex"
attr(hetcor_mat$tests,"dimnames")[[2]][16]<-"Age at Admission"
attr(hetcor_mat$tests,"dimnames")[[2]][17]<-"Date of Admission"

attr(hetcor_mat$tests,"dimnames")[[1]][1]<-"Starting Substance"
attr(hetcor_mat$tests,"dimnames")[[1]][2]<-"Marital Status"
attr(hetcor_mat$tests,"dimnames")[[1]][3]<-"Educational Attainment"
attr(hetcor_mat$tests,"dimnames")[[1]][4]<-"Occupational Status"
attr(hetcor_mat$tests,"dimnames")[[1]][5]<-"Age of Onset of Drug Use"
attr(hetcor_mat$tests,"dimnames")[[1]][6]<-"Frequency of use of primary drug"
attr(hetcor_mat$tests,"dimnames")[[1]][7]<-"Motive of Admission to Treatment"
attr(hetcor_mat$tests,"dimnames")[[1]][8]<-"Psychiatric comorbidity"
attr(hetcor_mat$tests,"dimnames")[[1]][9]<-"Chilean Region of the Center"
attr(hetcor_mat$tests,"dimnames")[[1]][10]<-"Type of Center (Public)"
attr(hetcor_mat$tests,"dimnames")[[1]][11]<-"Early Dropout"
attr(hetcor_mat$tests,"dimnames")[[1]][12]<-"Residential Type of Plan"
attr(hetcor_mat$tests,"dimnames")[[1]][13]<-"Evaluation of the Therapeutic Process (Min. Achievement)"
attr(hetcor_mat$tests,"dimnames")[[1]][14]<-"No. of Previous Treatments"
attr(hetcor_mat$tests,"dimnames")[[1]][15]<-"Sex"
attr(hetcor_mat$tests,"dimnames")[[1]][16]<-"Age at Admission"
attr(hetcor_mat$tests,"dimnames")[[1]][17]<-"Date of Admission"

hetcor_mat$tests[is.na(hetcor_mat$tests)]<-1

ggcorrplot<-
ggcorrplot::ggcorrplot(hetcor_mat$correlations,
           ggtheme = ggplot2::theme_void,
           insig = "blank",
           pch=1,
           pch.cex=3,
           tl.srt = 45, 
           #pch="ns",
            p.mat = hetcor_mat$tests, #  replacement has 144 rows, data has 169
            #type = "lower",
           colors = c("#6D9EC1", "white", "#E46726"), 
           tl.cex=8,
           lab=F)+
  #scale_x_discrete(labels = var_lbls_p345, drop = F) +
  #scale_y_discrete(labels = var_lbls_p345, drop = F) +
  theme(axis.text.x = element_blank())+
  #theme(axis.text.y = element_text(size=7.5,color ="black", hjust = 1))+
  theme(axis.text.y = element_blank())+
  theme(legend.position="bottom")

#umx_ggcorrplot<-
#ggcorrplot::ggcorrplot(umx_hetcor_mat,
#           ggtheme = ggplot2::theme_void,
#           insig = "blank",
#           #pch=1,
#           pch.cex=3,
#           tl.srt = 45, 
#           pch="ns",
#          p.mat = hetcor_mat$tests, #  replacement has 144 rows, data has 169
#            #type = "lower",
#           colors = c("#6D9EC1", "white", "#E46726"), 
#           tl.cex=8,
#           lab=F)+
#  #scale_x_discrete(labels = var_lbls_p345, drop = F) +
#  #scale_y_discrete(labels = var_lbls_p345, drop = F) +
#  theme(axis.text.x = element_blank())+
#  #theme(axis.text.y = element_text(size=7.5,color ="black", hjust = 1))+
#  theme(axis.text.y = element_blank())+
#  theme(legend.position="bottom")

ggplotly(ggcorrplot, height = 800, width=800)%>% 
  layout(xaxis= list(showticklabels = FALSE)) %>% 
 layout(annotations = 
 list(x = .1, y = -0.031, text = "", 
      showarrow = F, xref='paper', yref='paper', 
      #xanchor='center', yanchor='auto', xshift=0, yshift=-0,
      font=list(size=11, color="darkblue"))
 )

Figure 2. Heterogeneous Correlation Matrix of Variables of Interest


Imputation


We generated a plot to see all the missing values in the sample.


#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:400px; overflow-x: scroll; width:100%">
library(dplyr)
library(ggplot2)

missing.values<-
CONS_C1_df_dup_SEP_2020_match %>%
  rowwise %>%
  dplyr::mutate_at(.vars = vars(sus_ini_mod_mvv:fech_ing_num),
                   .funs = ~ifelse(is.na(.), 1, 0)) %>% 
  dplyr::ungroup() %>% 
  dplyr::summarise_at(vars(sus_ini_mod_mvv:fech_ing_num),~sum(.))

plot_miss<-
missing.values %>%
  data.table::melt() %>% 
  dplyr::mutate(perc= value/sum(sel_treat_tabyl[,2])) %>% 
  dplyr::mutate(label_text= paste0("Variable= ",variable,"<br>n= ",value,"<br>",scales::percent(round(perc,3)))) %>%
  dplyr::mutate(perc=perc*100) %>% 
  ggplot() +
  geom_bar(aes(x=factor(variable), y=perc,label= label_text), stat = 'identity') +
  sjPlot::theme_sjplot()+
#  scale_y_continuous(limits=c(0,1), labels=percent)+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size=9))+
  labs(x=NULL, y="% of Missing Values", caption=paste0("Nota. Porcentaje de perdidos del total (",sum(sel_treat_tabyl[,2]),")"))
  ggplotly(plot_miss, tooltip = c("label_text"))%>% layout(xaxis= list(showticklabels = T), height = 600, width=800) %>%   layout(yaxis = list(tickformat='%',  range = c(0, 7)))

Figure 3. Bar plot of Porcentaje of Missing Values per Variables

  #</div>



From the figure above, we could see that the starting substance (sus_ini_mvv) and the onset of drug use (edad_ini_cons) had around 6% of missing data. These values should be imputed. We first focused on the age of onset of drug use.


attr(CONS_C1_df_dup_SEP_2020_match$sus_ini_mod_mvv,"label")<-"Starting Substance"
attr(CONS_C1_df_dup_SEP_2020_match$estado_conyugal_2,"label")<-"Marital Status"
attr(CONS_C1_df_dup_SEP_2020_match$escolaridad_rec,"label")<-"Educational Attainment"
attr(CONS_C1_df_dup_SEP_2020_match$condicion_ocupacional_corr,"label")<-"Occupational Status"
attr(CONS_C1_df_dup_SEP_2020_match$edad_ini_cons,"label")<-"Age of Onset of Drug Use"
attr(CONS_C1_df_dup_SEP_2020_match$freq_cons_sus_prin,"label")<-"Frequency of use of primary drug"
attr(CONS_C1_df_dup_SEP_2020_match$origen_ingreso_mod,"label")<-"Motive of Admission to Treatment"
attr(CONS_C1_df_dup_SEP_2020_match$nombre_region,"label")<-"Chilean Region of the Center"
attr(CONS_C1_df_dup_SEP_2020_match$tipo_centro_pub,"label")<-"Type of Center (Public)"
attr(CONS_C1_df_dup_SEP_2020_match$abandono_temprano_rec,"label")<-"Early Dropout"
attr(CONS_C1_df_dup_SEP_2020_match$tipo_de_plan_res,"label")<-"Residential Type of Plan"
attr(CONS_C1_df_dup_SEP_2020_match$evaluacindelprocesoteraputico_min,"label")<-"Evaluation of the Therapeutic Process (Min. Achievement)"
attr(CONS_C1_df_dup_SEP_2020_match$prev_treat,"label")<-"No. of Previous Treatments"
attr(CONS_C1_df_dup_SEP_2020_match$sexo_2,"label")<-"Sex"
attr(CONS_C1_df_dup_SEP_2020_match$edad_al_ing,"label")<-"Age at Admission"
attr(CONS_C1_df_dup_SEP_2020_match$fech_ing_num,"label")<-"Date of Admission to Treatment (Numeric)"

#origen_ingreso #dg_global_nec_int_soc_or_1 "Diagnóstico global de necesidades de integración social" #evaluacindelprocesoteraputico "Evaluación del proceso terapéutico" #escolaridad_rec "macrozona"

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(evaluacindelprocesoteraputico) 
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(evaluacindelprocesoteraputico) 
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(nombre_region)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

library(Amelia)
CONS_C1_df_dup_SEP_2020_match<-
CONS_C1_df_dup_SEP_2020_match %>% 
    dplyr::group_by(hash_key) %>% 
    dplyr::mutate(rn=row_number()) %>% 
    dplyr::ungroup() %>% 
    data.table::data.table()

dataset_match_miss_sus_prin<-  
  CONS_C1_df_dup_SEP_2020 %>% 
    dplyr::filter(row %in% unlist(CONS_C1_df_dup_SEP_2020_match$row)) %>% 
      dplyr::select(row, edad_ini_sus_prin,via_adm_sus_prin_act,compromiso_biopsicosocial)
  
  #HACER BASE ESPECIAL QUE CONTENGA UNA VARIABLE DE EDAD DE INICIO DE CONSUMO DE SUSTANCIA PRINCIPAL PARA EQUIPARAR
  CONS_C1_df_dup_SEP_2020_match_miss<-
    CONS_C1_df_dup_SEP_2020_match %>%
    dplyr::left_join(dataset_match_miss_sus_prin,by="row") %>% 
    dplyr::mutate(condicion_ocupacional_corr=factor(condicion_ocupacional_corr))
  
amelia_fit <- amelia(CONS_C1_df_dup_SEP_2020_match_miss, 
                     m=30, parallel = "multicore", #noms = "row",
                     idvars=c("row"),#"hash_key","rn"
                     noms= c("sus_ini_mod_mvv", "estado_conyugal_2", "escolaridad_rec", "condicion_ocupacional_corr","via_adm_sus_prin_act", "freq_cons_sus_prin", "origen_ingreso_mod",  "nombre_region", "sexo_2"),
                     ords= c("dg_cie_10_rec", "prev_treat","compromiso_biopsicosocial"),
                     cs = "hash_key", ts = "rn")

#amelia_fit$imputations$imp1
#CONS_C1_df_dup_SEP_2020_match[!complete.cases(CONS_C1_df_dup_SEP_2020_match[,..match.on_tot])]

compare.density(amelia_fit,var="edad_ini_cons")
Figure 4. Bar plot of Porcentaje of Missing Values per Variables

Figure 4. Bar plot of Porcentaje of Missing Values per Variables


Based on the figure above, the age of onset of drug use was similar between the imputed values and the observed. However, there were two logical conditions to fulfill in order to replace adequately these values in the database: the age of onset must not be greater than the age of onset of drug use in the primary substance at admission, and may not be greater than the age of admission to treatment. We selected the minimum value of age of onset of drug use among the imputed, because one user could not have more than one age of onset of drug use.


#On this graph, a y = x line indicates the line of perfect agreement; that is, if the imputation model was a perfect predictor of the true value, all the imputations would fall on this line
no_mostrar=0
if(no_mostrar==1){
  res <- { 
    setTimeLimit(nn_K*500)
    ovr_imp_edad_ini_cons<-overimpute(amelia_fit, var = "edad_ini_cons")
  }
}
#Hay poca relación en las imputaciones.

# Ver si alguno de los usuarios con valores perdidos tiene de todas formas datos en esta variable.
edad_ini_sus_prin_for_imp<-
CONS_C1_df_dup_SEP_2020 %>% 
    dplyr::filter( hash_key %in% unlist(unique(CONS_C1_df_dup_SEP_2020_match[which(!complete.cases(CONS_C1_df_dup_SEP_2020_match$edad_ini_cons)),"hash_key"]))) %>%
    dplyr::filter(!is.na(edad_ini_sus_prin)) %>% 
    dplyr::group_by(hash_key) %>% 
    dplyr::summarise(min_edad_ini_sus_prin=min(edad_ini_sus_prin, na.rm=T), edad_al_ing_min=min(edad_al_ing,na.rm=T))

cumplimiento_errores<-data.frame()
for (i in paste0("imp",1:30)){
  rn_cum_err<-data.frame(amelia_fit$imputations[i]) %>% 
    dplyr::rename(hash_key = 2) %>% 
    dplyr::rename(edad_ini_cons = 7) %>% 
    dplyr::mutate(hash_key=as.character(hash_key)) %>% 
    dplyr::left_join(edad_ini_sus_prin_for_imp, by="hash_key") %>% 
    dplyr::filter(edad_al_ing_min<edad_ini_cons|edad_ini_cons>min_edad_ini_sus_prin) %>% # edad de inicio de consumo no debe ser mayor a la menor edad a la ingreso de cada usuario, y la mĆ­nima edad de inicio de consumo de sustancia principal es menor a la edad de inicio de consumo
    nrow()
  rn_cum_err<-cbind(i,rn_cum_err)
 cumplimiento_errores<- rbind(cumplimiento_errores,rn_cum_err)
}
colnames(cumplimiento_errores)<- c("imp","no_errors_age_of_onset_drug_use")

paste0("Number of users that had different age of onset of drug use before replacement: ",CONS_C1_df_dup_SEP_2020_match %>% 
    dplyr::group_by(hash_key) %>% 
    dplyr::mutate(n_dis=n_distinct(edad_ini_cons)) %>% 
    dplyr::ungroup() %>% 
    dplyr::filter(n_dis>1) %>% 
      nrow())
## [1] "Number of users that had different age of onset of drug use before replacement: 0"
n_miss_edad_ini_cons<-nrow(CONS_C1_df_dup_SEP_2020_match[which(!complete.cases(CONS_C1_df_dup_SEP_2020_match$edad_ini_cons)),"hash_key"])
plot_imps<-
cumplimiento_errores %>%
  dplyr::mutate(no_errors_age_of_onset_drug_use=as.numeric(no_errors_age_of_onset_drug_use)) %>% 
  dplyr::mutate(imp=factor(imp, levels =paste0("imp",1:30))) %>% 
  dplyr::mutate(perc= no_errors_age_of_onset_drug_use/n_miss_edad_ini_cons) %>% 
  dplyr::mutate(label_text= paste0("Variable= ",imp,"<br>n= ",no_errors_age_of_onset_drug_use,"<br>",scales::percent(round(perc,2)))) %>%
  dplyr::mutate(perc=perc*100) %>% 
  ggplot() +
  geom_bar(aes(x=imp, y=perc,label= label_text), stat = 'identity') +
  sjPlot::theme_sjplot()+
#  scale_y_continuous(limits=c(0,1), labels=percent)+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size=9))+
  labs(x=NULL, y="% of Values With Logical Discrepancies")+
  theme(aspect.ratio = 2/1)
  ggplotly(plot_imps, tooltip = c("label_text"))%>% layout(xaxis= list(showticklabels = T)) %>%   layout(yaxis = list(tickformat='%',  range = c(0, 40)), height = 600, width=800) 

Figure 5. Bar plot of Percentage of Incorrect Imputed Values per Imputation Sample

edad_ini_cons_imputed<-
  cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$edad_ini_cons,
       amelia_fit$imputations$imp2$edad_ini_cons,
       amelia_fit$imputations$imp3$edad_ini_cons,
       amelia_fit$imputations$imp4$edad_ini_cons,
       amelia_fit$imputations$imp5$edad_ini_cons,
       amelia_fit$imputations$imp6$edad_ini_cons,
       amelia_fit$imputations$imp7$edad_ini_cons,
       amelia_fit$imputations$imp8$edad_ini_cons,
       amelia_fit$imputations$imp9$edad_ini_cons,
       amelia_fit$imputations$imp10$edad_ini_cons,
       amelia_fit$imputations$imp11$edad_ini_cons,
       amelia_fit$imputations$imp12$edad_ini_cons,
       amelia_fit$imputations$imp13$edad_ini_cons,
       amelia_fit$imputations$imp14$edad_ini_cons,
       amelia_fit$imputations$imp15$edad_ini_cons,
       amelia_fit$imputations$imp16$edad_ini_cons,
       amelia_fit$imputations$imp17$edad_ini_cons,
       amelia_fit$imputations$imp18$edad_ini_cons,
       amelia_fit$imputations$imp19$edad_ini_cons,
       amelia_fit$imputations$imp20$edad_ini_cons,
       amelia_fit$imputations$imp21$edad_ini_cons,
       amelia_fit$imputations$imp22$edad_ini_cons,
       amelia_fit$imputations$imp23$edad_ini_cons,
       amelia_fit$imputations$imp24$edad_ini_cons,
       amelia_fit$imputations$imp25$edad_ini_cons,
       amelia_fit$imputations$imp26$edad_ini_cons,
       amelia_fit$imputations$imp27$edad_ini_cons,
       amelia_fit$imputations$imp28$edad_ini_cons,
       amelia_fit$imputations$imp29$edad_ini_cons,
       amelia_fit$imputations$imp30$edad_ini_cons
       ) 

edad_ini_cons_imputed<-cbind(data.table(edad_ini_cons_imputed[,1],data.table(edad_ini_cons_imputed[,2:31])[, AVG := rowMeans(.SD, na.rm=T)]))%>% 
  janitor::clean_names() %>%
 dplyr::mutate(Min = dplyr::select(., amelia_fit_imputations_imp1_edad_ini_cons:amelia_fit_imputations_imp30_edad_ini_cons) %>% purrr::reduce(pmin))
  

# Reemplazo los valores perdidos:
CONS_C1_df_dup_SEP_2020_match_miss1<-
CONS_C1_df_dup_SEP_2020_match_miss %>% 
  dplyr::left_join(edad_ini_cons_imputed,by=c("row"="v1")) %>% 
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(min_edad_al_ing=min(edad_al_ing,na.rm=T),min_edad_al_ing=ifelse(is.infinite(min_edad_al_ing), NA, min_edad_al_ing)) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(edad_ini_cons=dplyr::case_when(is.na(edad_ini_cons) & as.numeric(avg)<edad_ini_sus_prin & as.numeric(avg)<min_edad_al_ing~as.numeric(avg),is.na(edad_ini_cons) & as.numeric(Min)<edad_ini_sus_prin & as.numeric(Min)<min_edad_al_ing~as.numeric(Min),
  is.na(edad_ini_cons) & is.na(edad_ini_sus_prin) & avg<as.numeric(min_edad_al_ing)~as.numeric(avg),is.na(edad_ini_cons) & is.na(edad_ini_sus_prin) & Min<as.numeric(min_edad_al_ing)~as.numeric(Min),
                                               TRUE~as.numeric(edad_ini_cons)))

#is.na(edad_ini_cons) & is.na(edad_ini_sus_prin) & is.na(min_edad_al_ing)~as.numeric(avg),
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss1$edad_ini_cons))
paste0("Number of rows with values that did not fulfilled the conditions: ",CONS_C1_df_dup_SEP_2020_match_miss1 %>% 
    dplyr::filter(is.na(edad_ini_cons)) %>% 
    dplyr::select(hash_key, edad_ini_cons, min_edad_al_ing,edad_ini_sus_prin,min_edad_al_ing, avg, Min) %>% nrow())
## [1] "Number of rows with values that did not fulfilled the conditions: 8"
CONS_C1_df_dup_SEP_2020_match_miss1<-
CONS_C1_df_dup_SEP_2020_match_miss1 %>% 
    dplyr::group_by(hash_key) %>% 
    dplyr::mutate(n_dis=n_distinct(edad_ini_cons)) %>% 
    dplyr::ungroup() %>% 
    dplyr::group_by(hash_key) %>% 
  dplyr::mutate(min_edad_ini_cons=min(edad_ini_cons,na.rm=T), edad_ini_cons= dplyr::case_when(n_dis>1 & !is.na(min_edad_ini_cons)~min_edad_ini_cons,TRUE~edad_ini_cons))

paste0("Number of users that had different age of onset of drug use after replacement: ",CONS_C1_df_dup_SEP_2020_match_miss1 %>% 
    dplyr::group_by(hash_key) %>% 
    dplyr::mutate(n_dis=n_distinct(edad_ini_cons)) %>% 
    dplyr::ungroup() %>% 
    dplyr::filter(n_dis>1) %>% 
      nrow())
## [1] "Number of users that had different age of onset of drug use after replacement: 0"


There were 7 cases of imputed ages of onset of drug use that did not fulfilled the conditions necessary to replace the missing values with the imputed ones. We also looked over the


#On this graph, a y = x line indicates the line of perfect agreement; that is, if the imputation model was a perfect predictor of the true value, all the imputations would fall on this line
no_mostrar=0
if(no_mostrar==1){
  res <- { 
    setTimeLimit(nn_K*500)
    ovr_imp_edad_ini_cons<-overimpute(amelia_fit, var = "edad_al_ing")
  }
}
#Hay poca relación en las imputaciones.
#table(is.na(CONS_C1_df_dup_SEP_2020_match_not_miss$edad_al_ing),exclude=NULL)

edad_al_ing_imputed<-
  cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$edad_al_ing,
       amelia_fit$imputations$imp2$edad_al_ing,
       amelia_fit$imputations$imp3$edad_al_ing,
       amelia_fit$imputations$imp4$edad_al_ing,
       amelia_fit$imputations$imp5$edad_al_ing,
       amelia_fit$imputations$imp6$edad_al_ing,
       amelia_fit$imputations$imp7$edad_al_ing,
       amelia_fit$imputations$imp8$edad_al_ing,
       amelia_fit$imputations$imp9$edad_al_ing,
       amelia_fit$imputations$imp10$edad_al_ing,
       amelia_fit$imputations$imp11$edad_al_ing,
       amelia_fit$imputations$imp12$edad_al_ing,
       amelia_fit$imputations$imp13$edad_al_ing,
       amelia_fit$imputations$imp14$edad_al_ing,
       amelia_fit$imputations$imp15$edad_al_ing,
       amelia_fit$imputations$imp16$edad_al_ing,
       amelia_fit$imputations$imp17$edad_al_ing,
       amelia_fit$imputations$imp18$edad_al_ing,
       amelia_fit$imputations$imp19$edad_al_ing,
       amelia_fit$imputations$imp20$edad_al_ing,
       amelia_fit$imputations$imp21$edad_al_ing,
       amelia_fit$imputations$imp22$edad_al_ing,
       amelia_fit$imputations$imp23$edad_al_ing,
       amelia_fit$imputations$imp24$edad_al_ing,
       amelia_fit$imputations$imp25$edad_al_ing,
       amelia_fit$imputations$imp26$edad_al_ing,
       amelia_fit$imputations$imp27$edad_al_ing,
       amelia_fit$imputations$imp28$edad_al_ing,
       amelia_fit$imputations$imp29$edad_al_ing,
       amelia_fit$imputations$imp30$edad_al_ing
       ) 

edad_al_ing_imputed<-cbind(data.table(edad_al_ing_imputed[,1],data.table(edad_al_ing_imputed[,2:31])[, AVG_edad_ing := rowMeans(.SD, na.rm=T)]))%>% 
  janitor::clean_names() %>%
 mutate(Min_edad_ing = dplyr::select(., amelia_fit_imputations_imp1_edad_al_ing:amelia_fit_imputations_imp30_edad_al_ing) %>% purrr::reduce(pmin))
  

# Reemplazo los valores perdidos:
CONS_C1_df_dup_SEP_2020_match_miss12<-
CONS_C1_df_dup_SEP_2020_match_miss1 %>% 
  dplyr::left_join(edad_al_ing_imputed,by=c("row"="v1")) %>% 
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(min_edad_ini_cons=min(edad_ini_cons,na.rm=T),min_edad_ini_cons=ifelse(is.infinite(min_edad_ini_cons), NA, min_edad_ini_cons)) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(edad_al_ing=dplyr::case_when(is.na(edad_al_ing) & as.numeric(avg_edad_ing)>=min_edad_ini_cons & as.numeric(avg_edad_ing)>=edad_ini_sus_prin~as.numeric(avg_edad_ing),is.na(edad_al_ing) & as.numeric(Min_edad_ing)>=edad_ini_sus_prin & as.numeric(Min_edad_ing)>=edad_ini_sus_prin~as.numeric(Min_edad_ing),is.na(edad_al_ing) & is.na(edad_ini_sus_prin) & avg_edad_ing>=as.numeric(min_edad_ini_cons)~as.numeric(avg_edad_ing),is.na(edad_al_ing) & is.na(edad_ini_sus_prin) & Min_edad_ing>=as.numeric(min_edad_ini_cons)~as.numeric(Min_edad_ing),TRUE~as.numeric(edad_al_ing)))

no_mostrar=0
if(no_mostrar==1){
  try(
  CONS_C1_df_dup_SEP_2020_match_miss1 %>% 
      dplyr::left_join(edad_al_ing_imputed,by=c("row"="v1")) %>% 
      dplyr::group_by(hash_key) %>% 
      dplyr::mutate(min_edad_ini_cons=min(edad_ini_cons,na.rm=T),min_edad_ini_cons=ifelse(is.infinite(min_edad_ini_cons), NA, min_edad_ini_cons)) %>% 
      dplyr::ungroup() %>% 
      dplyr::filter(is.na(edad_al_ing)) %>% 
      dplyr::select(hash_key, rn, edad_al_ing, avg_edad_ing, Min_edad_ing,min_edad_ini_cons,edad_ini_sus_prin)
  )
}
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss12$edad_al_ing))


We selected the most vulnerable value among the candidates of imputations of the starting substance (First, Cocaine paste, Cocaine hydrochloride or snort cocaine, Marijuana, Alcohol, and Other).


# Ver distintos valores propuestos para sustancia de inciio
sus_ini_mod_mvv_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$sus_ini_mod_mvv,
       amelia_fit$imputations$imp2$sus_ini_mod_mvv,
       amelia_fit$imputations$imp3$sus_ini_mod_mvv,
       amelia_fit$imputations$imp4$sus_ini_mod_mvv,
       amelia_fit$imputations$imp5$sus_ini_mod_mvv,
       amelia_fit$imputations$imp6$sus_ini_mod_mvv,
       amelia_fit$imputations$imp7$sus_ini_mod_mvv,
       amelia_fit$imputations$imp8$sus_ini_mod_mvv,
       amelia_fit$imputations$imp9$sus_ini_mod_mvv,
       amelia_fit$imputations$imp10$sus_ini_mod_mvv,
       amelia_fit$imputations$imp11$sus_ini_mod_mvv,
       amelia_fit$imputations$imp12$sus_ini_mod_mvv,
       amelia_fit$imputations$imp13$sus_ini_mod_mvv,
       amelia_fit$imputations$imp14$sus_ini_mod_mvv,
       amelia_fit$imputations$imp15$sus_ini_mod_mvv,
       amelia_fit$imputations$imp16$sus_ini_mod_mvv,
       amelia_fit$imputations$imp17$sus_ini_mod_mvv,
       amelia_fit$imputations$imp18$sus_ini_mod_mvv,
       amelia_fit$imputations$imp19$sus_ini_mod_mvv,
       amelia_fit$imputations$imp20$sus_ini_mod_mvv,
       amelia_fit$imputations$imp21$sus_ini_mod_mvv,
       amelia_fit$imputations$imp22$sus_ini_mod_mvv,
       amelia_fit$imputations$imp23$sus_ini_mod_mvv,
       amelia_fit$imputations$imp24$sus_ini_mod_mvv,
       amelia_fit$imputations$imp25$sus_ini_mod_mvv,
       amelia_fit$imputations$imp26$sus_ini_mod_mvv,
       amelia_fit$imputations$imp27$sus_ini_mod_mvv,
       amelia_fit$imputations$imp28$sus_ini_mod_mvv,
       amelia_fit$imputations$imp29$sus_ini_mod_mvv,
       amelia_fit$imputations$imp30$sus_ini_mod_mvv
       ) 

sus_ini_mod_mvv_imputed<-
sus_ini_mod_mvv_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Marijuana",as.character(.))~1,TRUE~0), .names="mar_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Alcohol",as.character(.))~1,TRUE~0), .names="oh_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Cocaine paste",as.character(.))~1,TRUE~0), .names="pb_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Cocaine hydrochloride",as.character(.))~1,TRUE~0), .names="coc_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Other",as.character(.))~1,TRUE~0), .names="otr_{col}"))%>%
        dplyr::mutate(sus_ini_mod_mvv_mar = base::rowSums(dplyr::select(., starts_with("mar_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_oh = base::rowSums(dplyr::select(., starts_with("oh_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_pb = base::rowSums(dplyr::select(., starts_with("pb_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_coc = base::rowSums(dplyr::select(., starts_with("coc_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_otr = base::rowSums(dplyr::select(., starts_with("otr_")))) %>% 
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_mar>0~1,TRUE~0)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_oh>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_pb>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_coc>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_otr>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_to_imputation=dplyr::case_when(sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_pb>0~"Cocaine paste",
                                                               sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_coc>0~"Cocaine hydrochloride",
                                                               sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_mar>0~"Marijuana",
                                                               sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_oh>0~"Alcohol",
                                                               sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_otr>0~"Other",
                                                                
                                                               sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_pb>0~"Cocaine paste",
                                                               sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_coc>0~"Cocaine hydrochloride",
                                                                sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_mar>0~"Marijuana",
                                                                sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_oh>0~"Alcohol",
                                                                sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_otr>0~"Other")) %>% 
  janitor::clean_names()

sus_ini_mod_mvv_imputed<-
dplyr::select(sus_ini_mod_mvv_imputed,amelia_fit_imputations_imp1_row,sus_ini_mod_mvv_to_imputation)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss2<-
CONS_C1_df_dup_SEP_2020_match_miss1 %>% 
   dplyr::left_join(sus_ini_mod_mvv_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(sus_ini_mod_mvv=factor(dplyr::case_when(is.na(sus_ini_mod_mvv)~as.character(sus_ini_mod_mvv_to_imputation),
                                 TRUE~as.character(sus_ini_mod_mvv)))) %>% 
  data.table()
 
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#


Another variable that is worth imputing is the Frequency of use of primary drug at admission. We selected the imputed values with the value with the most frequent drug use.


# Ver distintos valores propuestos para sustancia de inciio
freq_cons_sus_prin_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$freq_cons_sus_prin,
       amelia_fit$imputations$imp2$freq_cons_sus_prin,
       amelia_fit$imputations$imp3$freq_cons_sus_prin,
       amelia_fit$imputations$imp4$freq_cons_sus_prin,
       amelia_fit$imputations$imp5$freq_cons_sus_prin,
       amelia_fit$imputations$imp6$freq_cons_sus_prin,
       amelia_fit$imputations$imp7$freq_cons_sus_prin,
       amelia_fit$imputations$imp8$freq_cons_sus_prin,
       amelia_fit$imputations$imp9$freq_cons_sus_prin,
       amelia_fit$imputations$imp10$freq_cons_sus_prin,
       amelia_fit$imputations$imp11$freq_cons_sus_prin,
       amelia_fit$imputations$imp12$freq_cons_sus_prin,
       amelia_fit$imputations$imp13$freq_cons_sus_prin,
       amelia_fit$imputations$imp14$freq_cons_sus_prin,
       amelia_fit$imputations$imp15$freq_cons_sus_prin,
       amelia_fit$imputations$imp16$freq_cons_sus_prin,
       amelia_fit$imputations$imp17$freq_cons_sus_prin,
       amelia_fit$imputations$imp18$freq_cons_sus_prin,
       amelia_fit$imputations$imp19$freq_cons_sus_prin,
       amelia_fit$imputations$imp20$freq_cons_sus_prin,
       amelia_fit$imputations$imp21$freq_cons_sus_prin,
       amelia_fit$imputations$imp22$freq_cons_sus_prin,
       amelia_fit$imputations$imp23$freq_cons_sus_prin,
       amelia_fit$imputations$imp24$freq_cons_sus_prin,
       amelia_fit$imputations$imp25$freq_cons_sus_prin,
       amelia_fit$imputations$imp26$freq_cons_sus_prin,
       amelia_fit$imputations$imp27$freq_cons_sus_prin,
       amelia_fit$imputations$imp28$freq_cons_sus_prin,
       amelia_fit$imputations$imp29$freq_cons_sus_prin,
       amelia_fit$imputations$imp30$freq_cons_sus_prin
       ) 

freq_cons_sus_prin_imputed<-
freq_cons_sus_prin_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("1 day a week or more",as.character(.))~1,TRUE~0), .names="1_day_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("2 to 3 days a week",as.character(.))~1,TRUE~0), .names="2_3_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("4 to 6 days a week",as.character(.))~1,TRUE~0), .names="4_6_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Less than 1 day a week",as.character(.))~1,TRUE~0), .names="less_1_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Did not use",as.character(.))~1,TRUE~0), .names="did_not_{col}"))%>%
    dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Daily",as.character(.))~1,TRUE~0), .names="daily_{col}"))%>%
  dplyr::mutate(freq_cons_sus_prin_daily = base::rowSums(dplyr::select(., starts_with("daily_")))) %>% 
  dplyr::mutate(freq_cons_sus_prin_4_6 = base::rowSums(dplyr::select(., starts_with("4_6_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_2_3 = base::rowSums(dplyr::select(., starts_with("2_3_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_1_day = base::rowSums(dplyr::select(., starts_with("1_day_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_less_1 = base::rowSums(dplyr::select(., starts_with("less_1_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_did_not = base::rowSums(dplyr::select(., starts_with("did_not_")))) %>% 
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_1_day>0~1,TRUE~0)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_2_3>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_4_6>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_less_1>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_did_not>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_daily>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  #hierarchy
  dplyr::mutate(freq_cons_sus_prin_to_imputation=
                  dplyr::case_when(freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_daily>0~"Daily",
                                     freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_4_6>0~"4 to 6 days a week",
                                     freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_2_3>0~"2 to 3 days a week",
                                     freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_1_day>0~"1 day a week or more",
                                     freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_less_1>0~"Less than 1 day a week",
                                     freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_did_not>0~"Did not use",
                                                                
                                     freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_daily>0~"Daily",
                                     freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_4_6>0~"4 to 6 days a week",
                                     freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_2_3>0~"2 to 3 days a week",
                                     freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_1_day>0~"1 day a week or more",
                                     freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_less_1>0~"Less than 1 day a week",
                                     freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_did_not>0~"Did not use"
                                   )) %>% 
  janitor::clean_names()

freq_cons_sus_prin_imputed<-
dplyr::select(freq_cons_sus_prin_imputed,amelia_fit_imputations_imp1_row,freq_cons_sus_prin_to_imputation)


#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss3<-
CONS_C1_df_dup_SEP_2020_match_miss2 %>% 
   dplyr::left_join(freq_cons_sus_prin_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(freq_cons_sus_prin=factor(dplyr::case_when(is.na(freq_cons_sus_prin)~as.character(freq_cons_sus_prin_to_imputation), TRUE~as.character(freq_cons_sus_prin)))) %>% 
  data.table()


Another variable that is worth imputing is the Educational Attainment. We were particularly cautious to impute attainments that would follow a progression from primary school to more than high school.


# Ver distintos valores propuestos para sustancia de inciio
escolaridad_rec_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
                  amelia_fit$imputations$imp1$hash_key,
                  amelia_fit$imputations$imp1$fech_ing_num,
                  amelia_fit$imputations$imp1$escolaridad_rec,
                  amelia_fit$imputations$imp2$escolaridad_rec,
                  amelia_fit$imputations$imp3$escolaridad_rec,
                  amelia_fit$imputations$imp4$escolaridad_rec,
                  amelia_fit$imputations$imp5$escolaridad_rec,
                  amelia_fit$imputations$imp6$escolaridad_rec,
                  amelia_fit$imputations$imp7$escolaridad_rec,
                  amelia_fit$imputations$imp8$escolaridad_rec,
                  amelia_fit$imputations$imp9$escolaridad_rec,
                  amelia_fit$imputations$imp10$escolaridad_rec,
                  amelia_fit$imputations$imp11$escolaridad_rec,
                  amelia_fit$imputations$imp12$escolaridad_rec,
                  amelia_fit$imputations$imp13$escolaridad_rec,
                  amelia_fit$imputations$imp14$escolaridad_rec,
                  amelia_fit$imputations$imp15$escolaridad_rec,
                  amelia_fit$imputations$imp16$escolaridad_rec,
                  amelia_fit$imputations$imp17$escolaridad_rec,
                  amelia_fit$imputations$imp18$escolaridad_rec,
                  amelia_fit$imputations$imp19$escolaridad_rec,
                  amelia_fit$imputations$imp20$escolaridad_rec,
                  amelia_fit$imputations$imp21$escolaridad_rec,
                  amelia_fit$imputations$imp22$escolaridad_rec,
                  amelia_fit$imputations$imp23$escolaridad_rec,
                  amelia_fit$imputations$imp24$escolaridad_rec,
                  amelia_fit$imputations$imp25$escolaridad_rec,
                  amelia_fit$imputations$imp26$escolaridad_rec,
                  amelia_fit$imputations$imp27$escolaridad_rec,
                  amelia_fit$imputations$imp28$escolaridad_rec,
                  amelia_fit$imputations$imp29$escolaridad_rec,
                  amelia_fit$imputations$imp30$escolaridad_rec,
                  amelia_fit$imputations$imp30$rn
               ) 

escolaridad_rec_imputed<-
escolaridad_rec_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.escolaridad_rec:amelia_fit.imputations.imp30.escolaridad_rec),~dplyr::case_when(grepl("3-Completed primary school or less",as.character(.))~1,TRUE~0), .names="3_primary_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.escolaridad_rec:amelia_fit.imputations.imp30.escolaridad_rec),~dplyr::case_when(grepl("2-Completed high school or less",as.character(.))~1,TRUE~0), .names="2_high_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.escolaridad_rec:amelia_fit.imputations.imp30.escolaridad_rec),~dplyr::case_when(grepl("1-More than high school",as.character(.))~1,TRUE~0), .names="1_more_high_{col}"))%>%

  dplyr::mutate(escolaridad_rec_3_primary = base::rowSums(dplyr::select(., contains("3_primary_")))) %>% 
  dplyr::mutate(escolaridad_rec_2_high = base::rowSums(dplyr::select(., contains("2_high_"))))%>%
  dplyr::mutate(escolaridad_rec_1_more_high = base::rowSums(dplyr::select(., contains("1_more_high_"))))%>%
  
  dplyr::left_join(cbind.data.frame(CONS_C1_df_dup_SEP_2020_match$row, CONS_C1_df_dup_SEP_2020_match$escolaridad_rec),by=c("amelia_fit.imputations.imp1.row"="CONS_C1_df_dup_SEP_2020_match$row")) %>%
  dplyr::rename("escolaridad_rec_original"="CONS_C1_df_dup_SEP_2020_match$escolaridad_rec") %>%
  dplyr::mutate(escolaridad_rec_original=as.numeric(substr(escolaridad_rec_original, 1, 1))) %>%
  dplyr::arrange(amelia_fit.imputations.imp1.hash_key,amelia_fit.imputations.imp30.rn) %>% 
  dplyr::group_by(amelia_fit.imputations.imp1.hash_key) %>%  
  dplyr::mutate(siguiente_escolaridad_rec_original=lead(escolaridad_rec_original), 
                subsig_escolaridad_rec_original=lead(escolaridad_rec_original,n =2), 
                rn=max(amelia_fit.imputations.imp30.rn),
                n_na_esc_or=is.na(escolaridad_rec_original),
                sum_n_na_esc_or=sum(n_na_esc_or,na.rm=T),
                max_sum_n_na_esc_or=max(n_na_esc_or,na.rm=T)
                ) %>% 
#dplyr::select(amelia_fit.imputations.imp1.hash_key,amelia_fit.imputations.imp30.rn,
#              siguiente_escolaridad_rec_original,escolaridad_rec_original,amelia_fit.imputations.imp1.fech_ing_num)%>% View()
  dplyr::ungroup() %>% 
#dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(escolaridad_rec_tot=dplyr::case_when(escolaridad_rec_3_primary>0~1,TRUE~0)) %>% 
  dplyr::mutate(escolaridad_rec_tot=dplyr::case_when(escolaridad_rec_2_high>0~escolaridad_rec_tot+1,TRUE~escolaridad_rec_tot)) %>% 
  dplyr::mutate(escolaridad_rec_tot=dplyr::case_when(escolaridad_rec_1_more_high>0~escolaridad_rec_tot+1,TRUE~escolaridad_rec_tot)) %>% 
  #hierarchy
  dplyr::mutate(escolaridad_rec_to_imputation=
# 1A) TIENE MƁS DE UN TRAT POR USUARIO (RN) Y HAY MAS DE UN VALOR DE IMPUTACION PARA ESCOLARIDAD
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  dplyr::case_when(
    escolaridad_rec_tot>1 &
    max_sum_n_na_esc_or==1 &
    escolaridad_rec_3_primary>0 &
    rn>1 & !is.na(siguiente_escolaridad_rec_original) &
    siguiente_escolaridad_rec_original<=3 ~"3-Completed primary school or less",
    
    escolaridad_rec_tot>1 & 
    max_sum_n_na_esc_or==1 &
    escolaridad_rec_2_high>0 & 
    rn>1 & !is.na(siguiente_escolaridad_rec_original) &
    siguiente_escolaridad_rec_original<=2 ~"2-Completed high school or less",
    
    escolaridad_rec_tot>1 & #hay mÔs de un valor de imputación para escolaridad
    max_sum_n_na_esc_or==1 & #no hay mƔs de un valor de escolaridad a imputar
    escolaridad_rec_1_more_high >0 & #el valor que tiene en este caso es mƔs de high school
    rn>1 & !is.na(siguiente_escolaridad_rec_original) & #tiene mƔs de un trat por usuario y no hay siguiente escolaridad perdida
    siguiente_escolaridad_rec_original==1~"1-More than high school", #si ya tenƭa mƔs de universitario, debƭa tener el mismo valor en el siguiente,
    # 1A2) SI HAY UN PERDIDO EN EL SIGUIENTE TRATAMIENTO, PERO EL SUBSIGUIENTE Sƍ TIENE UN VALOR
    #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
    escolaridad_rec_tot>1 &
    max_sum_n_na_esc_or==1 &
    escolaridad_rec_3_primary>0 &
    rn>1 & is.na(siguiente_escolaridad_rec_original) &
    subsig_escolaridad_rec_original<=3 ~"3-Completed primary school or less",
    
    escolaridad_rec_tot>1 & 
    max_sum_n_na_esc_or==1 &
    escolaridad_rec_2_high>0 & 
    rn>1 & is.na(siguiente_escolaridad_rec_original) &
    subsig_escolaridad_rec_original<=2 ~"2-Completed high school or less",
    
    escolaridad_rec_tot>1 & #hay mÔs de un valor de imputación para escolaridad
    max_sum_n_na_esc_or==1 & #no hay mƔs de un valor de escolaridad a imputar
    escolaridad_rec_1_more_high >0 & #el valor que tiene en este caso es mƔs de high school
    rn>1 & is.na(siguiente_escolaridad_rec_original) &
    subsig_escolaridad_rec_original==1~"1-More than high school", #si ya tenƭa mƔs de universitario, debƭa tener el mismo valor en el siguiente,
    # 1A3) SI HAY UN PERDIDO EN EL SIGUIENTE TRATAMIENTO, PERO EL SUBSIGUIENTE NO TIENE UN VALOR (QUIZAS PORQUE NO EXISTE INCLUSO)
    #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
    escolaridad_rec_tot>1 &
    max_sum_n_na_esc_or==1 &
    escolaridad_rec_3_primary>0 &
    rn>1 & is.na(siguiente_escolaridad_rec_original) &
    is.na(subsig_escolaridad_rec_original) ~"3-Completed primary school or less",
    
    escolaridad_rec_tot>1 & 
    max_sum_n_na_esc_or==1 &
    escolaridad_rec_2_high>0 & 
    rn>1 & is.na(siguiente_escolaridad_rec_original) &
    is.na(subsig_escolaridad_rec_original) ~"2-Completed high school or less",
    
    escolaridad_rec_tot>1 & #hay mÔs de un valor de imputación para escolaridad
    max_sum_n_na_esc_or==1 & #no hay mƔs de un valor de escolaridad a imputar
    escolaridad_rec_1_more_high >0 & #el valor que tiene en este caso es mƔs de high school
    rn>1 & is.na(siguiente_escolaridad_rec_original) &
    is.na(subsig_escolaridad_rec_original)~"1-More than high school", #si ya tenƭa mƔs de universitario, debƭa tener el mismo valor en el siguiente,
    
    # 1B) TIENE MƁS DE UN TRAT POR USUARIO (RN) Y NO HAY MAS DE UN VALOR DE IMPUTACION PARA ESCOLARIDAD
    #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
    
    escolaridad_rec_tot==1 &
    max_sum_n_na_esc_or==1 &
    escolaridad_rec_3_primary>0 &
    rn>1 & !is.na(siguiente_escolaridad_rec_original) &
    siguiente_escolaridad_rec_original<=3 ~"3-Completed primary school or less",
    
    escolaridad_rec_tot==1 & 
    max_sum_n_na_esc_or==1 &
    escolaridad_rec_2_high>0 & 
    rn>1 & !is.na(siguiente_escolaridad_rec_original) &
    siguiente_escolaridad_rec_original<=2 ~"2-Completed high school or less",
    
    escolaridad_rec_tot==1 & #no hay mÔs de un valor de imputación para escolaridad
    max_sum_n_na_esc_or==1 & #no hay mƔs de un valor de escolaridad a imputar
    escolaridad_rec_1_more_high>0 & #el valor que tiene en este caso es mƔs de high school
    rn>1 & !is.na(siguiente_escolaridad_rec_original) & #tiene mƔs de un trat por usuario y no hay siguiente escolaridad perdida
    siguiente_escolaridad_rec_original==1~"1-More than high school", #si ya tenƭa mƔs de universitario, debƭa tener el mismo valor en el siguiente,
    
    # 2A) TIENE SOLO UN TRAT POR USUARIO (RN) Y HAY MAS DE UN VALOR DE IMPUTACION PARA ESCOLARIDAD
    #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
    escolaridad_rec_tot>1 &
    max_sum_n_na_esc_or==1 &
    escolaridad_rec_3_primary>0 &
    rn==1 ~"3-Completed primary school or less",
    
    escolaridad_rec_tot>1 & 
    max_sum_n_na_esc_or==1 &
    escolaridad_rec_2_high>0 & 
    rn==1 ~"2-Completed high school or less",
    
    escolaridad_rec_tot>1 & #hay mÔs de un valor de imputación para escolaridad
    max_sum_n_na_esc_or==1 & #no hay mƔs de un valor de escolaridad a imputar
    escolaridad_rec_1_more_high>0 & #el valor que tiene en este caso es mƔs de high school
    rn==1 ~"1-More than high school", #si ya tenƭa mƔs de universitario, debƭa tener el mismo valor en el siguiente,
    
    # 2B) TIENE SOLO UN TRAT POR USUARIO (RN) Y NO HAY MAS DE UN VALOR DE IMPUTACION PARA ESCOLARIDAD
    #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
    
    escolaridad_rec_tot==1 &
    max_sum_n_na_esc_or==1 &
    escolaridad_rec_3_primary>0 &
    rn==1 ~"3-Completed primary school or less",
    
    escolaridad_rec_tot==1 & 
    max_sum_n_na_esc_or==1 &
    escolaridad_rec_2_high>0 & 
    rn==1 ~"2-Completed high school or less",
    
    escolaridad_rec_tot==1 & #no hay mÔs de un valor de imputación para escolaridad
    max_sum_n_na_esc_or==1 & #no hay mƔs de un valor de escolaridad a imputar
    escolaridad_rec_1_more_high>0 & #el valor que tiene en este caso es mƔs de high school
    rn==1 ~"1-More than high school" 
  )) %>% 
janitor::clean_names()

escolaridad_rec_imputed<-
dplyr::select(escolaridad_rec_imputed,amelia_fit_imputations_imp1_row,escolaridad_rec_to_imputation)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss4<-
CONS_C1_df_dup_SEP_2020_match_miss3 %>% 
   dplyr::left_join(escolaridad_rec_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(no_info_on_esc=dplyr::case_when(is.na(escolaridad_rec)~1,TRUE~0)) %>% 
    dplyr::mutate(escolaridad_rec=factor(dplyr::case_when(is.na(escolaridad_rec)~as.character(escolaridad_rec_to_imputation), TRUE~as.character(escolaridad_rec)))) %>% 
    dplyr::arrange(hash_key,rn) %>% 
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(escolaridad_rec_num=as.numeric(substr(escolaridad_rec, 1, 1)),
                sig_escolaridad_rec_num=lead(escolaridad_rec_num),
                ant_escolaridad_rec_num=lag(escolaridad_rec_num)) %>% 
  dplyr::mutate(escolaridad_rec=factor(dplyr::case_when(#si el anterior es menos vulnerable que el presente, NA
  escolaridad_rec_num>ant_escolaridad_rec_num~NA_character_, TRUE~as.character(escolaridad_rec)))) %>% 
  dplyr::mutate(escolaridad_rec=factor(dplyr::case_when(# si es mƔs vulnerable que el presente, es NA 
  escolaridad_rec_num<sig_escolaridad_rec_num~NA_character_, TRUE~as.character(escolaridad_rec)))) %>% 
  dplyr::ungroup() %>% 
  data.table()

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
paste0("Check inconsistencies with posterior educational attainments (0= No inconsistencies): ",CONS_C1_df_dup_SEP_2020_match_miss4 %>% 
  dplyr::arrange(hash_key,rn) %>% 
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(escolaridad_rec_num=as.numeric(substr(escolaridad_rec, 1, 1)),
                sig_escolaridad_rec_num=lead(escolaridad_rec_num),
                ant_escolaridad_rec_num=lag(escolaridad_rec_num)) %>% 
  dplyr::ungroup() %>% 
  dplyr::filter(escolaridad_rec_num>ant_escolaridad_rec_num) %>% 
  dplyr::select(hash_key,rn,fech_ing_num, escolaridad_rec, escolaridad_rec_num, sig_escolaridad_rec_num,ant_escolaridad_rec_num,no_info_on_esc) %>% 
  nrow())
## [1] "Check inconsistencies with posterior educational attainments (0= No inconsistencies): 0"
paste0("Problematic rows of users with inconsistent educational attainment that were declared as missing: ",CONS_C1_df_dup_SEP_2020_match_miss4 %>%
  dplyr::filter(hash_key %in% c("14b66a29bfc79b06c9e9a8c198259b07",
                                "16dc6c5edd96af2171127c760e8480ab",
                                "18b1f9646a2cd6bebd962637cff0a21a",
                                "296c5b02088d8d42eaa25b69e3c5bdc1",
                                "309a5876b3bd6c1eb7a326d31acc2895",
                                "35f69f3fd0dda5eab952983dd39618e2",
                                "374745c85601976177fe614a7370e475",
                                "3d722d03aaa88c6ad4c039c860c1b837",
                                "428f31fc7b7f6da87f0e8590f6e147b7",
                                "4b27553c38e707a6fda01855b784cf66",
                                "520337f7e5e3ae2e27b6e4711d4744aa",
                                "56d93af7846b383ae60019c2980ab73e",
                                "5db98f93254e218ec89689241de2f61b",
                                "6c1593d4035a5ca371148c8a19ccb30b",
                                "7cb913f6fd959aace18df3fa0fa7079e",
                                "98d6644d995ea2c8777a683160728004",
                                "9e06871d982546d078729e6154d285ba",
                                "affb5fe42db45203a63e13c54ba7551b",
                                "b715d04a584dbdd450fd6f2ea68291fc",
                                "e099bd5828923c95a1ed2878a09c1118",
                                "ed31e09b0adeefb57fa54c2295b3b7f2",
                                "f273b4cdc0cf4f046811c208162571ae",
                                "f7c1eaee409f6b1b70f64e565a616942"
  )) %>% 
  #dplyr::select(hash_key,dup, escolaridad_rec) %>% 
  dplyr::select(hash_key,rn,fech_ing_num,escolaridad_rec,escolaridad_rec_num,sig_escolaridad_rec_num,ant_escolaridad_rec_num) %>%nrow())
## [1] "Problematic rows of users with inconsistent educational attainment that were declared as missing: 38"


We ended having 22 missing values in educational attainment, because these values did not fulfilled the requirements of progression of the educational attainment (eg., a user could not respond to have completed secondary school, but then answer that he had completed primary school only).


Additionally, we replaced missing values of the marital status. Since different marital status were not particularly more vulnerable between each other, we selected the most frequent imputed value among the different imputed databases.


# Ver distintos valores propuestos para estado conyugal
estado_conyugal_2_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$estado_conyugal_2,
       amelia_fit$imputations$imp2$estado_conyugal_2,
       amelia_fit$imputations$imp3$estado_conyugal_2,
       amelia_fit$imputations$imp4$estado_conyugal_2,
       amelia_fit$imputations$imp5$estado_conyugal_2,
       amelia_fit$imputations$imp6$estado_conyugal_2,
       amelia_fit$imputations$imp7$estado_conyugal_2,
       amelia_fit$imputations$imp8$estado_conyugal_2,
       amelia_fit$imputations$imp9$estado_conyugal_2,
       amelia_fit$imputations$imp10$estado_conyugal_2,
       amelia_fit$imputations$imp11$estado_conyugal_2,
       amelia_fit$imputations$imp12$estado_conyugal_2,
       amelia_fit$imputations$imp13$estado_conyugal_2,
       amelia_fit$imputations$imp14$estado_conyugal_2,
       amelia_fit$imputations$imp15$estado_conyugal_2,
       amelia_fit$imputations$imp16$estado_conyugal_2,
       amelia_fit$imputations$imp17$estado_conyugal_2,
       amelia_fit$imputations$imp18$estado_conyugal_2,
       amelia_fit$imputations$imp19$estado_conyugal_2,
       amelia_fit$imputations$imp20$estado_conyugal_2,
       amelia_fit$imputations$imp21$estado_conyugal_2,
       amelia_fit$imputations$imp22$estado_conyugal_2,
       amelia_fit$imputations$imp23$estado_conyugal_2,
       amelia_fit$imputations$imp24$estado_conyugal_2,
       amelia_fit$imputations$imp25$estado_conyugal_2,
       amelia_fit$imputations$imp26$estado_conyugal_2,
       amelia_fit$imputations$imp27$estado_conyugal_2,
       amelia_fit$imputations$imp28$estado_conyugal_2,
       amelia_fit$imputations$imp29$estado_conyugal_2,
       amelia_fit$imputations$imp30$estado_conyugal_2
       ) 

estado_conyugal_2_imputed<-
estado_conyugal_2_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Married/Shared living arrangements",as.character(.))~1,TRUE~0), .names="married_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Separated/Divorced",as.character(.))~1,TRUE~0), .names="sep_div_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Single",as.character(.))~1,TRUE~0), .names="singl_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Widower",as.character(.))~1,TRUE~0), .names="widow_{col}"))%>%
 
  dplyr::mutate(estado_conyugal_2_married = base::rowSums(dplyr::select(., starts_with("married_"))))%>%
  dplyr::mutate(estado_conyugal_2_sep_div = base::rowSums(dplyr::select(., starts_with("sep_div_"))))%>%
  dplyr::mutate(estado_conyugal_2_singl = base::rowSums(dplyr::select(., starts_with("singl_"))))%>%
  dplyr::mutate(estado_conyugal_2_wid = base::rowSums(dplyr::select(., starts_with("widow_"))))%>%
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_married>0~1,TRUE~0)) %>% 
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_sep_div>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>% 
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_singl>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>% 
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_wid>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>% 
  janitor::clean_names()
  
estado_conyugal_2_imputed_cat_est_cony<-  
    estado_conyugal_2_imputed %>%
        tidyr::pivot_longer(c(estado_conyugal_2_married, estado_conyugal_2_sep_div, estado_conyugal_2_singl, estado_conyugal_2_wid), names_to = "cat_est_conyugal", values_to = "count") %>%
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(estado_conyugal_2_imputed_max=max(count,na.rm=T)) %>% 
        dplyr::ungroup() %>% 
        dplyr::filter(estado_conyugal_2_imputed_max==count) %>% 
        dplyr::select(amelia_fit_imputations_imp1_row,cat_est_conyugal,count) %>% 
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(n_row=n()) %>% 
        dplyr::ungroup() %>% 
        dplyr::mutate(cat_est_conyugal=dplyr::case_when(n_row>1~NA_character_,
                                                        TRUE~cat_est_conyugal)) %>% 
        dplyr::distinct(amelia_fit_imputations_imp1_row,.keep_all = T)
  
estado_conyugal_2_imputed<-
  estado_conyugal_2_imputed %>% 
    dplyr::left_join(estado_conyugal_2_imputed_cat_est_cony, by="amelia_fit_imputations_imp1_row") %>%
    dplyr::mutate(cat_est_conyugal=dplyr::case_when(cat_est_conyugal=="estado_conyugal_2_married"~"Married/Shared living arrangements",cat_est_conyugal=="estado_conyugal_2_sep_div"~"Separated/Divorced",cat_est_conyugal=="estado_conyugal_2_singl"~"Single",cat_est_conyugal=="estado_conyugal_2_wid"~"Widower"
    ))%>% 
  janitor::clean_names()

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss5<-
CONS_C1_df_dup_SEP_2020_match_miss4 %>% 
   dplyr::left_join(dplyr::select(estado_conyugal_2_imputed,amelia_fit_imputations_imp1_row,cat_est_conyugal), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(estado_conyugal_2=factor(dplyr::case_when(is.na(estado_conyugal_2)~as.character(cat_est_conyugal),TRUE~as.character(estado_conyugal_2)))) %>% 
  data.table()


We could not resolve Marital status in 5 cases due to ties in the most frequent values. We looked over possible imputations to region of the center and type of the center (public or private).


# Ver distintos valores propuestos para estado conyugal
#evaluacindelprocesoteraputico_min nombre_region tipo_centro_pub

#no hay información. debemos imputar
no_mostrar=0
if (no_mostrar==1){
tipo_centro_nombre_region_nas_nombre_region<-
CONS_C1_df_dup_SEP_2020 %>% 
    #dplyr::filter(row %in% unlist(unique(CONS_C1_df_dup_SEP_2020_match[,"row"]))) %>% 
    dplyr::filter(is.na(nombre_region)) %>% 
    janitor::tabyl(tipo_centro, nombre_region) 
}

nombre_region_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$nombre_region,
       amelia_fit$imputations$imp2$nombre_region,
       amelia_fit$imputations$imp3$nombre_region,
       amelia_fit$imputations$imp4$nombre_region,
       amelia_fit$imputations$imp5$nombre_region,
       amelia_fit$imputations$imp6$nombre_region,
       amelia_fit$imputations$imp7$nombre_region,
       amelia_fit$imputations$imp8$nombre_region,
       amelia_fit$imputations$imp9$nombre_region,
       amelia_fit$imputations$imp10$nombre_region,
       amelia_fit$imputations$imp11$nombre_region,
       amelia_fit$imputations$imp12$nombre_region,
       amelia_fit$imputations$imp13$nombre_region,
       amelia_fit$imputations$imp14$nombre_region,
       amelia_fit$imputations$imp15$nombre_region,
       amelia_fit$imputations$imp16$nombre_region,
       amelia_fit$imputations$imp17$nombre_region,
       amelia_fit$imputations$imp18$nombre_region,
       amelia_fit$imputations$imp19$nombre_region,
       amelia_fit$imputations$imp20$nombre_region,
       amelia_fit$imputations$imp21$nombre_region,
       amelia_fit$imputations$imp22$nombre_region,
       amelia_fit$imputations$imp23$nombre_region,
       amelia_fit$imputations$imp24$nombre_region,
       amelia_fit$imputations$imp25$nombre_region,
       amelia_fit$imputations$imp26$nombre_region,
       amelia_fit$imputations$imp27$nombre_region,
       amelia_fit$imputations$imp28$nombre_region,
       amelia_fit$imputations$imp29$nombre_region,
       amelia_fit$imputations$imp30$nombre_region
       ) 
nombre_region_imputed<-
nombre_region_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Antofagasta",as.character(.))~1,TRUE~0), .names="reg_02_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Araucan",as.character(.))~1,TRUE~0), .names="reg_09_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Arica",as.character(.))~1,TRUE~0), .names="reg_15_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Atacama",as.character(.))~1,TRUE~0), .names="reg_03_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Ays",as.character(.))~1,TRUE~0), .names="reg_11_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Biob",as.character(.))~1,TRUE~0), .names="reg_08_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Coquimbo",as.character(.))~1,TRUE~0), .names="reg_04_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Los Lagos",as.character(.))~1,TRUE~0), .names="reg_10_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Los R",as.character(.))~1,TRUE~0), .names="reg_14_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Magallanes",as.character(.))~1,TRUE~0), .names="reg_12_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Maule",as.character(.))~1,TRUE~0), .names="reg_07_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Metropolitana",as.character(.))~1,TRUE~0), .names="reg_13_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("uble",as.character(.))~1,TRUE~0), .names="reg_16_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Higgins",as.character(.))~1,TRUE~0), .names="reg_06_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Tarapac",as.character(.))~1,TRUE~0), .names="reg_01_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Valpara",as.character(.))~1,TRUE~0), .names="reg_05_{col}"))%>%
  
 
  dplyr::mutate(nombre_region_02 = base::rowSums(dplyr::select(., starts_with("reg_02_"))))%>%
  dplyr::mutate(nombre_region_09 = base::rowSums(dplyr::select(., starts_with("reg_09_"))))%>%
  dplyr::mutate(nombre_region_15 = base::rowSums(dplyr::select(., starts_with("reg_15_"))))%>%
  dplyr::mutate(nombre_region_03 = base::rowSums(dplyr::select(., starts_with("reg_03_"))))%>%
  dplyr::mutate(nombre_region_11 = base::rowSums(dplyr::select(., starts_with("reg_11_"))))%>%
  dplyr::mutate(nombre_region_08 = base::rowSums(dplyr::select(., starts_with("reg_08_"))))%>%
  dplyr::mutate(nombre_region_04 = base::rowSums(dplyr::select(., starts_with("reg_04_"))))%>%
  dplyr::mutate(nombre_region_10 = base::rowSums(dplyr::select(., starts_with("reg_10_"))))%>%
  dplyr::mutate(nombre_region_14 = base::rowSums(dplyr::select(., starts_with("reg_14_"))))%>%
  dplyr::mutate(nombre_region_12 = base::rowSums(dplyr::select(., starts_with("reg_12_"))))%>%
  dplyr::mutate(nombre_region_07 = base::rowSums(dplyr::select(., starts_with("reg_07_"))))%>%
  dplyr::mutate(nombre_region_13 = base::rowSums(dplyr::select(., starts_with("reg_13_"))))%>%
  dplyr::mutate(nombre_region_16 = base::rowSums(dplyr::select(., starts_with("reg_16_"))))%>%
  dplyr::mutate(nombre_region_06 = base::rowSums(dplyr::select(., starts_with("reg_06_"))))%>%
  dplyr::mutate(nombre_region_01 = base::rowSums(dplyr::select(., starts_with("reg_01_"))))%>%
  dplyr::mutate(nombre_region_05 = base::rowSums(dplyr::select(., starts_with("reg_05_"))))%>%
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_02>0~1,TRUE~0)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_09>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_15>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_03>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_11>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_08>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_04>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_10>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_14>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_12>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_07>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_13>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_16>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_06>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_01>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_05>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  janitor::clean_names()
  
nombre_region_imputed_cat_reg<-  
    nombre_region_imputed %>%
        tidyr::pivot_longer(c(nombre_region_01, nombre_region_02, nombre_region_03, nombre_region_04, nombre_region_05, nombre_region_06, nombre_region_07, nombre_region_08, nombre_region_09, nombre_region_10, nombre_region_11, nombre_region_12, nombre_region_13, nombre_region_14, nombre_region_15), names_to = "cat_nombre_region", values_to = "count") %>%
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(nombre_region_imputed_max=max(count,na.rm=T)) %>% 
        dplyr::ungroup() %>% 
        dplyr::filter(nombre_region_imputed_max==count) %>% 
        dplyr::select(amelia_fit_imputations_imp1_row,cat_nombre_region,count) %>% 
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(n_row=n()) %>% 
        dplyr::ungroup() %>% 
        dplyr::mutate(cat_nombre_region=dplyr::case_when(n_row>1~NA_character_,
                                                        TRUE~cat_nombre_region)) %>% 
        dplyr::distinct(amelia_fit_imputations_imp1_row,.keep_all = T)
  
nombre_region_imputed<-
  nombre_region_imputed %>% 
    dplyr::left_join(nombre_region_imputed_cat_reg, by="amelia_fit_imputations_imp1_row") %>%
    dplyr::mutate(cat_nombre_region=dplyr::case_when(cat_nombre_region=="nombre_region_01"~"TarapacƔ (01)",cat_nombre_region=="nombre_region_02"~"Antofagasta (02)",cat_nombre_region=="nombre_region_03"~"Atacama (03)",cat_nombre_region=="nombre_region_04"~"Coquimbo (04)",cat_nombre_region=="nombre_region_05"~"Valparaƭso (05)",cat_nombre_region=="nombre_region_06"~"O'Higgins (06)",cat_nombre_region=="nombre_region_07"~"Maule (07)",cat_nombre_region=="nombre_region_08"~"Biobƭo (08)",cat_nombre_region=="nombre_region_09"~"Araucanƭa (09)",cat_nombre_region=="nombre_region_10"~"Los Lagos (10)",cat_nombre_region=="nombre_region_11"~"AysƩn (11)",cat_nombre_region=="nombre_region_12"~"Magallanes (12)",cat_nombre_region=="nombre_region_13"~"Metropolitana (13)",
                                                 cat_nombre_region=="nombre_region_14"~"Los Rƭos (14)",cat_nombre_region=="nombre_region_15"~"Arica (15)",cat_nombre_region=="nombre_region_16"~"Ƒuble (16)",
    ))%>% 
  janitor::clean_names()

#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_
tipo_centro_pub_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$tipo_centro_pub,
       amelia_fit$imputations$imp2$tipo_centro_pub,
       amelia_fit$imputations$imp3$tipo_centro_pub,
       amelia_fit$imputations$imp4$tipo_centro_pub,
       amelia_fit$imputations$imp5$tipo_centro_pub,
       amelia_fit$imputations$imp6$tipo_centro_pub,
       amelia_fit$imputations$imp7$tipo_centro_pub,
       amelia_fit$imputations$imp8$tipo_centro_pub,
       amelia_fit$imputations$imp9$tipo_centro_pub,
       amelia_fit$imputations$imp10$tipo_centro_pub,
       amelia_fit$imputations$imp11$tipo_centro_pub,
       amelia_fit$imputations$imp12$tipo_centro_pub,
       amelia_fit$imputations$imp13$tipo_centro_pub,
       amelia_fit$imputations$imp14$tipo_centro_pub,
       amelia_fit$imputations$imp15$tipo_centro_pub,
       amelia_fit$imputations$imp16$tipo_centro_pub,
       amelia_fit$imputations$imp17$tipo_centro_pub,
       amelia_fit$imputations$imp18$tipo_centro_pub,
       amelia_fit$imputations$imp19$tipo_centro_pub,
       amelia_fit$imputations$imp20$tipo_centro_pub,
       amelia_fit$imputations$imp21$tipo_centro_pub,
       amelia_fit$imputations$imp22$tipo_centro_pub,
       amelia_fit$imputations$imp23$tipo_centro_pub,
       amelia_fit$imputations$imp24$tipo_centro_pub,
       amelia_fit$imputations$imp25$tipo_centro_pub,
       amelia_fit$imputations$imp26$tipo_centro_pub,
       amelia_fit$imputations$imp27$tipo_centro_pub,
       amelia_fit$imputations$imp28$tipo_centro_pub,
       amelia_fit$imputations$imp29$tipo_centro_pub,
       amelia_fit$imputations$imp30$tipo_centro_pub
       ) 

tipo_centro_pub_imputed<-
  tipo_centro_pub_imputed %>% 
  dplyr::mutate(tipo_centro_pub = base::rowSums(dplyr::select(., ends_with("tipo_centro_pub"))))%>%
  dplyr::mutate(tipo_centro_pub_to_imputation= dplyr::case_when(tipo_centro_pub>15~1,
                                                  tipo_centro_pub==15~NA_real_,
                                                  TRUE~0)) %>% 
  janitor::clean_names()

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss6<-
CONS_C1_df_dup_SEP_2020_match_miss5 %>% 
   dplyr::left_join(dplyr::select(nombre_region_imputed,amelia_fit_imputations_imp1_row,cat_nombre_region), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(nombre_region=factor(dplyr::case_when(is.na(nombre_region)~as.character(cat_nombre_region),TRUE~as.character(nombre_region)))) %>% 
  dplyr::left_join(dplyr::select(tipo_centro_pub_imputed,amelia_fit_imputations_imp1_row,tipo_centro_pub_to_imputation), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
  dplyr::mutate(tipo_centro_pub=factor(dplyr::case_when(is.na(tipo_centro_pub)~as.logical(tipo_centro_pub_to_imputation),TRUE~as.logical(tipo_centro_pub)))) %>%
  data.table()
#CONS_C1_df_dup_SEP_2020_match_miss6
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$tipo_centro_pub))
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$nombre_region))


There were impossible to impute region of the center in NA cases due to ties in the different imputed values.


Sample Characteristics

We checked the characteristics of the sample depending on type of treatment (Residential or Outpatients).


#prop.table(table(CONS_C1_df_dup_SEP_2020_match$abandono_temprano_rec,CONS_C1_df_dup_SEP_2020_match$tipo_de_plan_res),2)
match.on.sel<-c("sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","condicion_ocupacional_corr","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region", "prev_treat","sexo_2","edad_al_ing","fech_ing_num")

match.on_tot <- c("row", "hash_key","sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","condicion_ocupacional_corr","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","abandono_temprano_rec","tipo_de_plan_res","evaluacindelprocesoteraputico_min","prev_treat","sexo_2","edad_al_ing","fech_ing_num")

catVars<-
c("sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","condicion_ocupacional_corr","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","tipo_de_plan_res","prev_treat","sexo_2")

#aƱado los imputados
CONS_C1_df_dup_SEP_2020_match_not_miss<-
CONS_C1_df_dup_SEP_2020_match %>% 
  dplyr::select(-sus_ini_mod_mvv,-estado_conyugal_2,-escolaridad_rec,-freq_cons_sus_prin,-edad_ini_cons,-nombre_region,-tipo_centro_pub) %>% 
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020_match_miss6,
                                 row,
                                 sus_ini_mod_mvv,
                                 estado_conyugal_2,
                                 escolaridad_rec,
                                 freq_cons_sus_prin,
                                 nombre_region,
                                 edad_ini_cons),by="row") %>% 
  dplyr::arrange(tipo_de_plan_res,hash_key,rn) %>% 
  dplyr::mutate(condicion_ocupacional_corr=factor(condicion_ocupacional_corr))
CONS_C1_df_dup_SEP_2020_match_not_miss2<-
  CONS_C1_df_dup_SEP_2020_match_not_miss[complete.cases(CONS_C1_df_dup_SEP_2020_match_not_miss[,..match.on_tot]),..match.on_tot] 


Considering that some missing values were not possible to impute, we started having 55,906 cases (users=47,581), and ended having 55,868 complete cases (users=47,558).


kableone <- function(x, ...) {
  capture.output(x <- print(x,...))
  knitr::kable(x,format= "html", format.args= list(decimal.mark= ".", big.mark= ","))
}
#table(is.na(CONS_C1_df_dup_SEP_2020_match_not_miss$edad_ini_cons))

#length(unique(CONS_C1_df_dup_SEP_2020_match$fech_ing_num))
#:#:#:#:#: DISMINUIR LA HETEROGENEIDAD DE LA FECHA DE INGRESO
# FORMAS DE CONSTREƑIR LA VARIABLE:
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num/10,0)
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-cut(CONS_C1_df_dup_SEP_2020_match$fech_ing_num,100)
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-CONS_C1_df_dup_SEP_2020_match_fech_ing_num
#CONS_C1_df_dup_SEP_2020_match_fech_ing_num<-CONS_C1_df_dup_SEP_2020_match$fech_ing_num
#length(unique(round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num,0)))
#length(unique(round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num/10,0)))

#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num/10,0)
#:#:#:#:#: 

attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$sus_ini_mod_mvv,"label")<-"Starting Substance"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$estado_conyugal_2,"label")<-"Marital Status"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$escolaridad_rec,"label")<-"Educational Attainment"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$edad_ini_cons,"label")<-"Age of Onset of Drug Use"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$freq_cons_sus_prin,"label")<-"Frequency of use of primary drug"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$nombre_region,"label")<-"Frequency of use of primary drug"

pre_tab1<-Sys.time()
tab1<-
CreateTableOne(vars = match.on.sel, strata = "tipo_de_plan_res", 
                       data = CONS_C1_df_dup_SEP_2020_match_not_miss2, factorVars = catVars, smd=T)
post_tab1<-Sys.time()
diff_time_tab1=post_tab1-pre_tab1

kableone(tab1, 
         caption = paste0("Table 1. Covariate Balance in the Variables of Interest"),
         nonnormal= c("edad_ini_cons","edad_al_ing","fech_ing_num"),#"\\hline",
                       smd=T, test=T, varLabels=T,noSpaces=T, printToggle=T, dropEqual=F) %>% 
    kableExtra::kable_styling(bootstrap_options = c("striped", "hover","condensed"),font_size= 10) %>%
  #()
  row_spec(1, bold = T, italic =T,color ="black",hline_after=T,extra_latex_after="\\arrayrulecolor{white}",font_size= 10) %>%
  #footnote(general = "Here is a general comments of the table. ",
  #        number = c("Footnote 1; ", "Footnote 2; "),
  #         alphabet = c("Footnote A; ", "Footnote B; "),
  #         symbol = c("Footnote Symbol 1; ", "Footnote Symbol 2")
  #         )%>%
  scroll_box(width = "100%", height = "400px") 
FALSE TRUE p test SMD
n 48143 7725
Starting Substance (%) <0.001 0.372
Other 997 (2.1) 186 (2.4)
Marijuana 13834 (28.7) 2993 (38.7)
Alcohol 25883 (53.8) 2802 (36.3)
Cocaine hydrochloride 2176 (4.5) 344 (4.5)
Cocaine paste 5253 (10.9) 1400 (18.1)
Marital Status (%) <0.001 0.309
Married/Shared living arrangements 16936 (35.2) 1713 (22.2)
Separated/Divorced 4905 (10.2) 704 (9.1)
Single 25843 (53.7) 5243 (67.9)
Widower 459 (1.0) 65 (0.8)
Educational Attainment (%) <0.001 0.107
1-More than high school 7126 (14.8) 944 (12.2)
3-Completed primary school or less 13715 (28.5) 2529 (32.7)
2-Completed high school or less 27302 (56.7) 4252 (55.0)
condicion_ocupacional_corr (%) <0.001 1.050
Employed 26034 (54.1) 947 (12.3)
Inactive 4402 (9.1) 742 (9.6)
Looking for a job for the first time 99 (0.2) 7 (0.1)
No activity 1884 (3.9) 1240 (16.1)
Not seeking for work 336 (0.7) 216 (2.8)
Unemployed 15388 (32.0) 4573 (59.2)
Age of Onset of Drug Use (median [IQR]) 15.00 [14.00, 17.17] 15.00 [13.00, 17.00] <0.001 nonnorm 0.069
Frequency of use of primary drug (%) <0.001 0.766
1 day a week or more 3339 (6.9) 163 (2.1)
2 to 3 days a week 14432 (30.0) 765 (9.9)
4 to 6 days a week 8139 (16.9) 941 (12.2)
Daily 19550 (40.6) 5736 (74.3)
Did not use 808 (1.7) 41 (0.5)
Less than 1 day a week 1875 (3.9) 79 (1.0)
Motive of Admission to Treatment (%) <0.001 0.495
Spontaneous 24231 (50.3) 2852 (36.9)
Assisted Referral 3562 (7.4) 1882 (24.4)
Other 2299 (4.8) 419 (5.4)
Justice Sector 4114 (8.5) 502 (6.5)
Health Sector 13937 (28.9) 2070 (26.8)
Diagnóstico CIE-10 (1 o mÔs)(Recodificado)/Psychiatric Diagnoses (ICD-10)(one or more)(Recoded) (%) <0.001 0.387
Without psychiatric comorbidity 18068 (37.5) 1597 (20.7)
Diagnosis unknown (under study) 11728 (24.4) 2689 (34.8)
With psychiatric comorbidity 18347 (38.1) 3439 (44.5)
Frequency of use of primary drug (%) <0.001 0.497
Antofagasta (02) 1261 (2.6) 616 (8.0)
AraucanĆ­a (09) 1045 (2.2) 62 (0.8)
Arica (15) 907 (1.9) 514 (6.7)
Atacama (03) 1113 (2.3) 155 (2.0)
AysƩn (11) 238 (0.5) 11 (0.1)
BiobĆ­o (08) 3103 (6.4) 371 (4.8)
Coquimbo (04) 1841 (3.8) 163 (2.1)
Los Lagos (10) 1413 (2.9) 108 (1.4)
Los RĆ­os (14) 578 (1.2) 114 (1.5)
Magallanes (12) 319 (0.7) 18 (0.2)
Maule (07) 2491 (5.2) 423 (5.5)
Metropolitana (13) 27752 (57.6) 3733 (48.3)
Ƒuble (16) 318 (0.7) 4 (0.1)
O’Higgins (06) 2036 (4.2) 348 (4.5)
TarapacĆ” (01) 731 (1.5) 414 (5.4)
ValparaĆ­so (05) 2997 (6.2) 671 (8.7)
No.Ā of Previous Treatments (%) <0.001 0.345
0 38924 (80.9) 5125 (66.3)
1 6888 (14.3) 1730 (22.4)
2 or more 2331 (4.8) 870 (11.3)
Sex = Women (%) 11176 (23.2) 2688 (34.8) <0.001 0.257
Age at Admission (median [IQR]) 33.39 [27.35, 41.38] 31.40 [25.79, 38.54] <0.001 nonnorm 0.213
Date of Admission to Treatment (Numeric) (median [IQR]) 16521.00 [15763.00, 17231.00] 16237.00 [15492.00, 16988.00] <0.001 nonnorm 0.207
#"tipo_de_plan_ambulatorio",
#https://cran.r-project.org/web/packages/tableone/vignettes/smd.html
#http://rstudio-pubs-static.s3.amazonaws.com/405765_2ce448f9bde24148a5f94c535a34b70e.html
#https://cran.r-project.org/web/packages/tableone/vignettes/introduction.html
#https://cran.r-project.org/web/packages/tableone/tableone.pdf
#https://www.rdocumentation.org/packages/tableone/versions/0.12.0/topics/CreateTableOne

## Construct a table 
#standardized mean differences of greater than 0.1


We checked the similarity in the samples using other measures, such as the variance ratio of the samples and Kolmogorov-Smirnov(KS) statistics.


library(cobalt)
bal2<-bal.tab(CONS_C1_df_dup_SEP_2020_match_not_miss2[,..match.on.sel], treat = CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_de_plan_res,
         thresholds = c(m = .1, v = 2),
         binary = "std", 
         continuous = "std",
         stats = c("mean.diffs", "variance.ratios","ks.statistics"))
## Note: 's.d.denom' not specified; assuming pooled.
#"mean.diffs", "variance.ratios","ks.statistics","ovl.coefficient"

options(knitr.kable.NA = '')

bal2$Balance[,2]<-round(bal2$Balance[,2],2)
bal2$Balance[,4]<-round(bal2$Balance[,4],2)
bal2$Balance[,6]<-round(bal2$Balance[,6],2)

bal2$Balance[,1:6] %>% 
    knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 2. Covariate Balance in the Variables of Interest"),
               col.names = c("Nature of Variables", "Unadjusted SMDs","Threshold","Unadjusted Variance Ratios","Threshold","Unadjusted KS"),
               align =rep('c', 101)) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 10) %>%
  kableExtra::add_footnote( c(paste("Note. ")), 
                            notation = "none") %>%
  kableExtra::scroll_box(width = "100%", height = "375px")
Table 2. Covariate Balance in the Variables of Interest
Nature of Variables Unadjusted SMDs Threshold Unadjusted Variance Ratios Threshold Unadjusted KS
sus_ini_mod_mvv_Other Binary 0.02 Balanced, <0.1 0.00
sus_ini_mod_mvv_Marijuana Binary 0.21 Not Balanced, >0.1 0.10
sus_ini_mod_mvv_Alcohol Binary -0.36 Not Balanced, >0.1 0.17
sus_ini_mod_mvv_Cocaine hydrochloride Binary 0.00 Balanced, <0.1 0.00
sus_ini_mod_mvv_Cocaine paste Binary 0.21 Not Balanced, >0.1 0.07
estado_conyugal_2_Married/Shared living arrangements Binary -0.29 Not Balanced, >0.1 0.13
estado_conyugal_2_Separated/Divorced Binary -0.04 Balanced, <0.1 0.01
estado_conyugal_2_Single Binary 0.29 Not Balanced, >0.1 0.14
estado_conyugal_2_Widower Binary -0.01 Balanced, <0.1 0.00
escolaridad_rec_1-More than high school Binary -0.08 Balanced, <0.1 0.03
escolaridad_rec_3-Completed primary school or less Binary 0.09 Balanced, <0.1 0.04
escolaridad_rec_2-Completed high school or less Binary -0.03 Balanced, <0.1 0.02
condicion_ocupacional_corr_Employed Binary -0.99 Not Balanced, >0.1 0.42
condicion_ocupacional_corr_Inactive Binary 0.02 Balanced, <0.1 0.00
condicion_ocupacional_corr_Looking for a job for the first time Binary -0.03 Balanced, <0.1 0.00
condicion_ocupacional_corr_No activity Binary 0.41 Not Balanced, >0.1 0.12
condicion_ocupacional_corr_Not seeking for work Binary 0.16 Not Balanced, >0.1 0.02
condicion_ocupacional_corr_Unemployed Binary 0.57 Not Balanced, >0.1 0.27
edad_ini_cons Contin. -0.07 Balanced, <0.1 1.00 Balanced, <2 0.06
freq_cons_sus_prin_1 day a week or more Binary -0.23 Not Balanced, >0.1 0.05
freq_cons_sus_prin_2 to 3 days a week Binary -0.52 Not Balanced, >0.1 0.20
freq_cons_sus_prin_4 to 6 days a week Binary -0.13 Not Balanced, >0.1 0.05
freq_cons_sus_prin_Daily Binary 0.72 Not Balanced, >0.1 0.34
freq_cons_sus_prin_Did not use Binary -0.11 Not Balanced, >0.1 0.01
freq_cons_sus_prin_Less than 1 day a week Binary -0.19 Not Balanced, >0.1 0.03
origen_ingreso_mod_Spontaneous Binary -0.27 Not Balanced, >0.1 0.13
origen_ingreso_mod_Assisted Referral Binary 0.48 Not Balanced, >0.1 0.17
origen_ingreso_mod_Other Binary 0.03 Balanced, <0.1 0.01
origen_ingreso_mod_Justice Sector Binary -0.08 Balanced, <0.1 0.02
origen_ingreso_mod_Health Sector Binary -0.05 Balanced, <0.1 0.02
dg_cie_10_rec_Without psychiatric comorbidity Binary -0.38 Not Balanced, >0.1 0.17
dg_cie_10_rec_Diagnosis unknown (under study) Binary 0.23 Not Balanced, >0.1 0.10
dg_cie_10_rec_With psychiatric comorbidity Binary 0.13 Not Balanced, >0.1 0.06
nombre_region_Antofagasta (02) Binary 0.24 Not Balanced, >0.1 0.05
nombre_region_AraucanĆ­a (09) Binary -0.11 Not Balanced, >0.1 0.01
nombre_region_Arica (15) Binary 0.24 Not Balanced, >0.1 0.05
nombre_region_Atacama (03) Binary -0.02 Balanced, <0.1 0.00
nombre_region_AysƩn (11) Binary -0.06 Balanced, <0.1 0.00
nombre_region_BiobĆ­o (08) Binary -0.07 Balanced, <0.1 0.02
nombre_region_Coquimbo (04) Binary -0.10 Not Balanced, >0.1 0.02
nombre_region_Los Lagos (10) Binary -0.11 Not Balanced, >0.1 0.02
nombre_region_Los RĆ­os (14) Binary 0.02 Balanced, <0.1 0.00
nombre_region_Magallanes (12) Binary -0.06 Balanced, <0.1 0.00
nombre_region_Maule (07) Binary 0.01 Balanced, <0.1 0.00
nombre_region_Metropolitana (13) Binary -0.19 Not Balanced, >0.1 0.09
nombre_region_Ƒuble (16) Binary -0.10 Not Balanced, >0.1 0.01
nombre_region_O’Higgins (06) Binary 0.01 Balanced, <0.1 0.00
nombre_region_TarapacĆ” (01) Binary 0.21 Not Balanced, >0.1 0.04
nombre_region_ValparaĆ­so (05) Binary 0.09 Balanced, <0.1 0.02
prev_treat_0 Binary -0.33 Not Balanced, >0.1 0.15
prev_treat_1 Binary 0.21 Not Balanced, >0.1 0.08
prev_treat_2 or more Binary 0.24 Not Balanced, >0.1 0.06
sexo_2_Women Binary 0.26 Not Balanced, >0.1 0.12
edad_al_ing Contin. -0.21 Not Balanced, >0.1 0.87 Balanced, <2 0.08
fech_ing_num Contin. -0.21 Not Balanced, >0.1 0.99 Balanced, <2 0.11
Note.


We generated a plot to focus on unbalanced data.


love.plot(bal2, binary = "std", 
          thresholds = c(m = .1),#, m=.2
          var.order = "unadjusted")+
  theme_bw()+
  ggtitle(NULL)+
  labs(caption="Note. Vertical dashed line= Standardized Differences of .1")+
  theme( plot.caption=element_text(hjust=0))+
   theme(legend.position = "none")
Figure 6. Covariates Balance on Different Values

Figure 6. Covariates Balance on Different Values

#ggplot(idh_pre, aes(idh_pre$quintil_0,idh_pre$idh_0)) + 
#  geom_point(aes(colour = idh_pre$groups), size = 5) + labs(x="Quintiles", y="Percentage")  + 
#  theme_bw() +  
#  scale_fill_manual(values=c("black", "gray60") ) + 
#  scale_colour_manual(name="Groups",values =c("Control"="black", "Exposed"="gray60")) + 
#  theme(legend.key = element_rect(colour = NA)) + 
#  theme(text = element_text(size=20)) + 
#  scale_y_continuous( limits = c(0,0.4), breaks = seq(.05,.4,by=.05))

#bal.tab(bal2, treat = "tipo_de_plan_res",var.name = "fech_ing_num", data = CONS_C1_df_dup_SEP_2020_match_not_miss2)
columna_dummy <- function(df, columna) {
  df %>% 
  mutate_at(columna, ~paste(columna, eval(as.symbol(columna)), sep = "_")) %>% 
    mutate(valor = 1) %>% 
    spread(key = columna, value = valor, fill = 0)
}

match.on<-c("sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","condicion_ocupacional_corr","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","abandono_temprano_rec","tipo_de_plan_res","evaluacindelprocesoteraputico_min","prev_treat","sexo_2","edad_al_ing")

#nrow(CONS_C1_df_dup_SEP_2020_match[complete.cases(CONS_C1_df_dup_SEP_2020_match[,match.on_tot]),match.on_tot]) #101,384

for (i in c(1:length(catVars[-10]))){
  cat<-as.character(catVars[-10][i])
  CONS_C1_df_dup_SEP_2020_match2<-columna_dummy(CONS_C1_df_dup_SEP_2020_match2,cat)
}
match.on.sel2<-names(CONS_C1_df_dup_SEP_2020_match2)[-c(1,2,5)]

#dim(CONS_C1_df_dup_SEP_2020_match[complete.cases(CONS_C1_df_dup_SEP_2020_match[,..match.on_tot]),..match.on_tot]) #94,097
library(MatchingFrontier)
mahal.frontier <- makeFrontier(dataset =CONS_C1_df_dup_SEP_2020_match2,
 treatment = 'tipo_de_plan_res',
 #outcome = 'diff_bet_treat',
 match.on = match.on.sel2)

L1.frontier <- makeFrontier(dataset =CONS_C1_df_dup_SEP_2020_match2,
 treatment = 'tipo_de_plan_res',
 #outcome = 'abandono_temprano_rec',
 match.on = match.on.sel2,
 QOI = 'SATT',
 metric = 'L1',
 ratio = 'fixed')
 
 #Error: In makeFrontier(), missing values in the data; remove them (or impute) and try again.

# By default, makeFrontier() calculates the frontier with the Average Mahalanobis Imbalance. However, as we demonstrate, MatchingFrontier works just as easily with L1 difference.
#The default quantity of interest is the feasible sample average treatment effect on the treated or FSATT

Specification

First, we had to discretize categorical variables into logical parameters, and for countinous covariates, we created quantiles


columna_dummy <- function(df, columna) {
  df %>% 
  mutate_at(columna, ~paste(columna, eval(as.symbol(columna)), sep = "_")) %>% 
    mutate(valor = 1) %>% 
    spread(key = columna, value = valor, fill = 0)
}

quantiles = function(covar, n_q) {
    p_q = seq(0, 1, 1/n_q)
    val_q = quantile(covar, probs = p_q, na.rm = TRUE)
    covar_out = rep(NA, length(covar))
    for (i in 1:n_q) {
        if (i==1) {covar_out[covar<val_q[i+1]] = i}
        if (i>1 & i<n_q) {covar_out[covar>=val_q[i] & covar<val_q[i+1]] = i}
        if (i==n_q) {covar_out[covar>=val_q[i] & covar<=val_q[i+1]] = i}}
    covar_out
}

CONS_C1_df_dup_SEP_2020_match_not_miss3<-CONS_C1_df_dup_SEP_2020_match_not_miss2
for (i in c(1:length(catVars))){#catVars[-10] excluding treatment indicator
  cat<-as.character(catVars[i])#catVars[-10] excluding treatment indicator
  CONS_C1_df_dup_SEP_2020_match_not_miss3<-columna_dummy(CONS_C1_df_dup_SEP_2020_match_not_miss3,cat)
}
CONS_C1_df_dup_SEP_2020_match_not_miss3$tipo_de_plan_res_FALSE<-NULL
CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_ini_cons<-quantiles(CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_ini_cons,20)
CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_al_ing<-quantiles(CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_al_ing,20)
CONS_C1_df_dup_SEP_2020_match_not_miss3$fech_ing_num<-quantiles(CONS_C1_df_dup_SEP_2020_match_not_miss3$fech_ing_num,20)
match.on.sel2<-names(CONS_C1_df_dup_SEP_2020_match_not_miss3)[-c(1,2,5)]
#"edad_ini_cons","edad_al_ing","fech_ing_num")

CONS_SEP_match = CONS_C1_df_dup_SEP_2020_match_not_miss2[order(CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_de_plan_res, decreasing = TRUE), ]


Match

library(designmatch)

#fine = list(covs = fine_covs)
#solver = list(name = name, t_max = t_max, approximate = 1, round_cplex = 0, trace_cplex = 0).
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
# 1. Gurobi installation

#For an exact solution, we strongly recommend running designmatch either with CPLEX or Gurobi.  Between these two solvers, the R interface of Gurobi is considerably easier to install.  Here we provide general instructions for manually installing Gurobi and its R interface in Mac and Windows machines.

#1. Create a free academic license
#   Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/creating_a_new_academic_li.html

#2. Install the software
#   2.1. In http://www.gurobi.com/index, go to Downloads > Gurobi Software
#   2.2. Choose your operating system and press download
#
#3. Retrieve and set up your Gurobi license
#   2.1. Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/retrieving_and_setting_up_.html
#   2.2. Then follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/retrieving_a_free_academic.html
#
#4. Test your license
#   Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/testing_your_license.html
#
#5. Install the R interface of Gurobi   
#   Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/r_installing_the_r_package.html
#   * In Windows, in R run the command install.packages("PATH\\gurobi_7.X-Y.zip", repos=NULL) where path leads to the file gurobi_7.X-Y.zip (for example PATH=C:\\gurobi702\\win64\\R; note that the path may be different in your computer), and "7.X-Y" refers to the version you are installing.
#   * In MAC, in R run the command install.packages('PATH/gurobi_7.X-Y.tgz', repos=NULL) where path leads to the file gurobi_7.X-Y.tgz (for example PATH=/Library/gurobi702/mac64/R; note that the path may be different in your computer), and "7.X-Y" refers to the version you are installing.
#       
#6. Test the installation 
#   Load the library and run the examples therein
#   * A possible error that you may get is the following: "Error: package ā€˜slam’ required by ā€˜gurobi’ could not be found". If that case, install.packages('slam') and try again.
#   You should be all set!

#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
require(slam)
# Solver options
#default solver is glpk with approximate = 1
#For an exact solution, we strongly recommend using cplex or gurobi as they are much faster than the other solvers, but they do require a license (free for academics, but not for people outside universities)
t_max = 60*6
solver = "gurobi" #cplex, glpk, gurobi and symphony
solver = list(name = solver, 
t_max = t_max, #t_max is a scalar with the maximum time limit for finding the matches.within this time limit, a partial, suboptimal solution is given
approximate = 1,#. If approximate = 1 (the default), an approximate solution is found via a relaxation of the original integer program
round_cplex = 0, 
trace = 1)#turns the optimizer output on

#Indicador de tratamiento
t_ind= CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_de_plan_res

# Moment balance: constrain differences in means to be at most 0.1 standard deviations apart
#:#:#:#:#:#:#:#:#:#:#:#:#:
#######mom_covs is a matrix where each column is a covariate whose mean is to be balanced
#######mom_tols is a vector of tolerances for the maximum difference in means for the covariates in mom_covs
#######mom_targets is a vector of target moments (e.g., means) of a distribution to be approximated by matched sampling. is optional, but if #######mom_covs is specified then mom_tols needs to be specified too
#######The lengths of mom_tols and mom_target have to be equal to the number of columns of mom_covs
mom_covs = cbind(CONS_SEP_match$edad_al_ing, CONS_SEP_match$fech_ing_num, CONS_SEP_match$edad_ini_cons)
mom_tols = absstddif(mom_covs, t_ind, .05)
mom = list(covs = mom_covs, tols = mom_tols, targets = NULL)

# Mean balance
covs = cbind(CONS_SEP_match$edad_al_ing, CONS_SEP_match$fech_ing_num, CONS_SEP_match$edad_ini_cons)
meantab(covs, t_ind)
##      Mis      Min      Max   Mean T   Mean C Std Dif P-val
## [1,]   0    15.14    86.76    34.79    34.79       0     1
## [2,]   0 13711.00 18193.00 16413.66 16413.66       0     1
## [3,]   0    -2.66    74.00    16.06    16.06       0     1
# Fine balance
#is a matrix where each column is a nominal covariate for fine balance
fine_covs = cbind(CONS_SEP_match$nombre_region, CONS_SEP_match$sus_ini_mod_mvv, CONS_SEP_match$estado_conyugal_2, CONS_SEP_match$escolaridad_rec, CONS_SEP_match$condicion_ocupacional_corr, CONS_SEP_match$freq_cons_sus_prin, CONS_SEP_match$origen_ingreso_mod, CONS_SEP_match$dg_cie_10_rec, CONS_SEP_match$prev_treat, CONS_SEP_match$sexo_2)
fine = list(covs = fine_covs)

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#MATCH
start.time <- Sys.time()
set.seed(2125)
out = cardmatch(t_ind, #ES NECESARIO QUE LOS TRATAMIENTOS ESTEN ORDENADOS Y LOS OTROS VECTORES TAMBIƋN 
                mom = mom,# ya los definĆ­ list(covs = mom_covs, tols = mom_tols, targets = mom_targets), 
          fine = fine, 
          solver = solver)
##   Building the matching problem... 
##   Gurobi optimizer is open... 
##   Finding the optimal matches... 
## Gurobi Optimizer version 9.1.0 build v9.1.0rc0 (win64)
## Thread count: 6 physical cores, 12 logical processors, using up to 12 threads
## Optimize a model with 59 rows, 55868 columns and 893888 nonzeros
## Model fingerprint: 0xdd1c42f4
## Variable types: 0 continuous, 55868 integer (55868 binary)
## Coefficient statistics:
##   Matrix range     [7e-02, 2e+04]
##   Objective range  [1e+00, 1e+00]
##   Bounds range     [0e+00, 0e+00]
##   RHS range        [0e+00, 0e+00]
## Found heuristic solution: objective -0.0000000
## Presolve time: 1.02s
## Presolved: 59 rows, 55868 columns, 893791 nonzeros
## Variable types: 0 continuous, 55868 integer (55868 binary)
## 
## Root relaxation: objective 7.725000e+03, 461 iterations, 0.58 seconds
## 
##     Nodes    |    Current Node    |     Objective Bounds      |     Work
##  Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time
## 
##      0     0 7725.00000    0    5   -0.00000 7725.00000      -     -    4s
## H    0     0                    7725.0000000 7725.00000  0.00%     -    4s
##      0     0 7725.00000    0    5 7725.00000 7725.00000  0.00%     -    4s
## 
## Explored 1 nodes (14617 simplex iterations) in 4.99 seconds
## Thread count was 12 (of 12 available processors)
## 
## Solution count 2: 7725 -0 
## 
## Optimal solution found (tolerance 1.00e-04)
## Best objective 7.725000000000e+03, best bound 7.725000000000e+03, gap 0.0000%
##   Optimal matches found
end.time <- Sys.time()
time.taken <- end.time - start.time
# Fine balance (note here we are getting an approximate solution)
#for (i in 1:ncol(fine_covs)) {     
#   print(finetab(fine_covs[, i], t_id_1, c_id_1))
#}
# Indices of the treated units and matched controls
t_id_1 = out$t_id  
c_id_1 = out$c_id   

paste0("No. of treatments: ",table(table(t_id_1)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_1))%>% formatC(big.mark = ","))
## [1] "No. of treatments: 1,261; No. of controls: 14,189"
d_match = CONS_SEP_match[c(t_id_1, c_id_1), ]
#cuidado, el anterior me encontró mÔs del mismo control para un tratado
#por eso ocuparƩ el de mƔs abajo
d_match_no_duplicates = CONS_SEP_match[which(CONS_SEP_match$row %in% c(t_id_1, c_id_1)), ]


Explore Results of the Matching(1)


vars_ecdf<- c('edad_al_ing', 'edad_ini_cons', 'fech_ing_num')
headings <- c("Age at Admission", "Age of Onset of Drug Use", "Date of Admission")
for (i in 1:length(headings)) {
  cat("### ",headings[i],"\n")
  f<-vars_ecdf[i]
  ecdfplot(as.data.frame(CONS_SEP_match)[,f], t_id_1, c_id_1, main_title = "", legend_position = "right")
  cat('\n\n')
}

Age at Admission

Figure 7. Empirical Cumulative Distribution Functions on the Matched Sample

Figure 7. Empirical Cumulative Distribution Functions on the Matched Sample

Age of Onset of Drug Use

Figure 7. Empirical Cumulative Distribution Functions on the Matched Sample

Figure 7. Empirical Cumulative Distribution Functions on the Matched Sample

Date of Admission

Figure 7. Empirical Cumulative Distribution Functions on the Matched Sample

Figure 7. Empirical Cumulative Distribution Functions on the Matched Sample


cat("### ","Love plot","\n")

Love plot

X_mat<-cbind(Region=CONS_SEP_match$nombre_region, 
            "Starting Substance"=CONS_SEP_match$sus_ini_mod_mvv,
            "Marital Status"=CONS_SEP_match$estado_conyugal_2,
            "Educational Attainment"=CONS_SEP_match$escolaridad_rec,
            "Occupational Status"=CONS_SEP_match$condicion_ocupacional_corr,
            "Freq Substance Use"=CONS_SEP_match$freq_cons_sus_prin,
            "Motive of Admission"=CONS_SEP_match$origen_ingreso_mod,
            "Psych. Comorbidity ICD-10"=CONS_SEP_match$dg_cie_10_rec,
            "Previous Treatment"=CONS_SEP_match$prev_treat, 
            "Sex"=CONS_SEP_match$sexo_2, 
            "Age at Admission"=CONS_SEP_match$edad_al_ing, 
            "Age Onset Drug Use"=CONS_SEP_match$edad_ini_cons, 
            "Date of Admission"=CONS_SEP_match$fech_ing_num)

vline = 0.1
loveplot(X_mat=X_mat, t_id=t_id_1, c_id=c_id_1, v_line=vline, legend_position = "topright")
Figure 8. Love plot of the Matched Sample in Covariates v/s Unmatched Sample

Figure 8. Love plot of the Matched Sample in Covariates v/s Unmatched Sample


options(knitr.kable.NA = '')

covs0_unmatch <- subset(CONS_SEP_match, select = -c(row, hash_key, evaluacindelprocesoteraputico_min, abandono_temprano_rec))#subset=,
covs0_match <- subset(d_match_no_duplicates, select = -c(row, hash_key, evaluacindelprocesoteraputico_min, abandono_temprano_rec))#subset=,

#_#_#_#_#_#_generar pareamientos prepost#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
library(cobalt)
bal1_nomatch<-bal.tab(covs0_unmatch, treat = CONS_SEP_match$tipo_de_plan_res,
         thresholds = c(m = .1, v = 2),
         binary = "std", 
         continuous = "std",
         stats = c("mean.diffs", "variance.ratios","ks.statistics"))

bal1_nomatch$Balance[,2]<-round(bal1_nomatch$Balance[,2],2)
bal1_nomatch$Balance[,4]<-round(bal1_nomatch$Balance[,4],2)
bal1_nomatch$Balance[,6]<-round(bal1_nomatch$Balance[,6],2)


bal1_match<-bal.tab(covs0_match, treat = d_match_no_duplicates$tipo_de_plan_res,
         thresholds = c(m = .1, v = 2),
         binary = "std", 
         continuous = "std",
         stats = c("mean.diffs", "variance.ratios","ks.statistics"))

bal1_match$Balance[,2]<-round(bal1_match$Balance[,2],2)
bal1_match$Balance[,4]<-round(bal1_match$Balance[,4],2)
bal1_match$Balance[,6]<-round(bal1_match$Balance[,6],2)


var_names<-c( "Age of Onset of Drug Use", "Age at Admission", "Date of Admission", "Starting Substance- Alcohol", "Starting Substance- Cocaine hydrochloride","Starting Substance- Cocaine paste", "Starting Substance- Marijuana","Starting Substance- Other", "Marital Status- Married/Shared liv. arr.","Marital Status- Separated/Divorced","Marital Status- Single","Marital Status- Widower","Educational Attainment- More than HS", "Educational Attainment- HS or less","Educational Attainment- PS or less","Occupational Status- Employed", "Occupational Status- Inactive","Occupational Status- Looking 1st job","Occupational Status- No activity", "Occupational Status- Not seeking work","Occupational Status- Unemployed","Freq Drug Cons- 1d/wk or more","Freq Drug Cons- 2-3d/wk","Freq Drug Cons- 4-6d/wk","Freq Drug Cons- Daily","Freq Drug Cons- Did not use","Freq Drug Cons- Less 1d/wk","Motive Admission- Assisted Referral", "Motive Admission- Health Sector", "Motive Admission- Justice Sector","Motive Admission- Other","Motive Admission- Spontaneous", "ICD-10 Comorbidity- Dg. Unknown/under study", "ICD-10 Comorbidity- W/Psych Comorbidity", "ICD-10 Comorbidity- Wo/Psych Comorbidity", "Region- Antofagasta(02)", "Region- AraucanĆ­a(09)","Region- Arica(15)","Region- Atacama(03)","Region- AysĆ©n(11)","Region- BiobĆ­o(08)","Region- Coquimbo(04)","Region- Los Lagos(10)","Region- Los RĆ­os(14)","Region- Magallanes(12)","Region- Maule(07)","Region- Metropolitana(13)","Region- Ƒuble(16)","Region- O'Higgins(06)","Region- TarapacĆ”(01)","Region- ValparaĆ­so(05)","Previous Treats.- 0","Previous Treats.- 1","Previous Treats.- 2 or more","Sex- Men")

pre_matched_matched<-cbind.data.frame(var_names,
                 data.table::data.table(bal1_nomatch$Balance[,1:6],keep.rownames = T),
                 data.table::data.table(bal1_match$Balance[,2:6],keep.rownames = F)
                                        ) %>% 
  janitor::clean_names() %>% 
  dplyr::arrange(desc(abs(diff_un)))

pre_matched_matched%>% 
    knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 3. Covariate Balance in the Variables of Interest"),
               col.names = c("Variables","Code","Nature of Variables", "SMDs","Threshold","Variance Ratios","Threshold","KS","SMDs","Threshold","Variance Ratios","Threshold","KS"),
               align =rep('c', 101)) %>%
  add_header_above(c(" "," "," ","Unadjusted" = 5, "Adjusted" = 5)) %>% 
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 9) %>%
  kableExtra::add_footnote( c(paste("Note.",paste0("Unadjusted (n=",dim(covs0_unmatch)[1] %>% format(big.mark=","),")"),";",paste0("Adjusted (n=",dim(covs0_match)[1] %>% format(big.mark=","),")")   )), 
                            notation = "none") %>%
  kableExtra::scroll_box(width = "100%", height = "375px") 
Table 3. Covariate Balance in the Variables of Interest
Unadjusted
Adjusted
Variables Code Nature of Variables SMDs Threshold Variance Ratios Threshold KS SMDs Threshold Variance Ratios Threshold KS
Educational Attainment- More than HS condicion_ocupacional_corr_Employed Binary -0.99 Not Balanced, >0.1 0.42 -0.89 Not Balanced, >0.1 0.38
Freq Drug Cons- 2-3d/wk freq_cons_sus_prin_Daily Binary 0.72 Not Balanced, >0.1 0.34 0.71 Not Balanced, >0.1 0.33
Occupational Status- Looking 1st job condicion_ocupacional_corr_Unemployed Binary 0.57 Not Balanced, >0.1 0.27 0.67 Not Balanced, >0.1 0.32
Occupational Status- Unemployed freq_cons_sus_prin_2 to 3 days a week Binary -0.52 Not Balanced, >0.1 0.20 -0.58 Not Balanced, >0.1 0.22
Freq Drug Cons- Less 1d/wk origen_ingreso_mod_Assisted Referral Binary 0.48 Not Balanced, >0.1 0.17 0.35 Not Balanced, >0.1 0.13
Occupational Status- Employed condicion_ocupacional_corr_No activity Binary 0.41 Not Balanced, >0.1 0.12 0.26 Not Balanced, >0.1 0.05
Motive Admission- Other dg_cie_10_rec_Without psychiatric comorbidity Binary -0.38 Not Balanced, >0.1 0.17 -0.18 Not Balanced, >0.1 0.09
Date of Admission sus_ini_mod_mvv_Alcohol Binary -0.36 Not Balanced, >0.1 0.17 -0.19 Not Balanced, >0.1 0.09
Region- TarapacĆ”(01) prev_treat_0 Binary -0.33 Not Balanced, >0.1 0.15 -0.25 Not Balanced, >0.1 0.08
Starting Substance- Cocaine paste estado_conyugal_2_Married/Shared living arrangements Binary -0.29 Not Balanced, >0.1 0.13 -0.25 Not Balanced, >0.1 0.11
Starting Substance- Other estado_conyugal_2_Single Binary 0.29 Not Balanced, >0.1 0.14 0.23 Not Balanced, >0.1 0.11
Freq Drug Cons- Did not use origen_ingreso_mod_Spontaneous Binary -0.27 Not Balanced, >0.1 0.13 -0.20 Not Balanced, >0.1 0.10
Previous Treats.- 1 sexo_2_Women Binary 0.26 Not Balanced, >0.1 0.12 0.17 Not Balanced, >0.1 0.08
ICD-10 Comorbidity- W/Psych Comorbidity nombre_region_Antofagasta (02) Binary 0.24 Not Balanced, >0.1 0.05 0.13 Not Balanced, >0.1 0.03
Region- Antofagasta(02) nombre_region_Arica (15) Binary 0.24 Not Balanced, >0.1 0.05 0.25 Not Balanced, >0.1 0.05
Previous Treats.- 0 prev_treat_2 or more Binary 0.24 Not Balanced, >0.1 0.06 0.11 Not Balanced, >0.1 0.02
Occupational Status- Not seeking work freq_cons_sus_prin_1 day a week or more Binary -0.23 Not Balanced, >0.1 0.05 -0.24 Not Balanced, >0.1 0.05
Motive Admission- Spontaneous dg_cie_10_rec_Diagnosis unknown (under study) Binary 0.23 Not Balanced, >0.1 0.10 0.04 Balanced, <0.1 0.02
Age at Admission sus_ini_mod_mvv_Marijuana Binary 0.21 Not Balanced, >0.1 0.10 0.17 Not Balanced, >0.1 0.07
Starting Substance- Cocaine hydrochloride sus_ini_mod_mvv_Cocaine paste Binary 0.21 Not Balanced, >0.1 0.07 0.08 Balanced, <0.1 0.04
Region- Ƒuble(16) nombre_region_TarapacĆ” (01) Binary 0.21 Not Balanced, >0.1 0.04 0.23 Not Balanced, >0.1 0.04
Region- ValparaĆ­so(05) prev_treat_1 Binary 0.21 Not Balanced, >0.1 0.08 0.22 Not Balanced, >0.1 0.06
Previous Treats.- 2 or more edad_al_ing Contin. -0.21 Not Balanced, >0.1 0.87 Balanced, <2 0.08 -0.12 Not Balanced, >0.1 1.08 Balanced, <2 0.08
Sex- Men fech_ing_num Contin. -0.21 Not Balanced, >0.1 0.99 Balanced, <2 0.11 0.00 Balanced, <0.1 0.89 Balanced, <2 0.04
Freq Drug Cons- Daily freq_cons_sus_prin_Less than 1 day a week Binary -0.19 Not Balanced, >0.1 0.03 -0.22 Not Balanced, >0.1 0.03
Region- Magallanes(12) nombre_region_Metropolitana (13) Binary -0.19 Not Balanced, >0.1 0.09 -0.15 Not Balanced, >0.1 0.07
Occupational Status- Inactive condicion_ocupacional_corr_Not seeking for work Binary 0.16 Not Balanced, >0.1 0.02 0.12 Not Balanced, >0.1 0.02
Freq Drug Cons- 1d/wk or more freq_cons_sus_prin_4 to 6 days a week Binary -0.13 Not Balanced, >0.1 0.05 -0.03 Balanced, <0.1 0.01
ICD-10 Comorbidity- Dg. Unknown/under study dg_cie_10_rec_With psychiatric comorbidity Binary 0.13 Not Balanced, >0.1 0.06 0.13 Not Balanced, >0.1 0.07
Freq Drug Cons- 4-6d/wk freq_cons_sus_prin_Did not use Binary -0.11 Not Balanced, >0.1 0.01 -0.15 Not Balanced, >0.1 0.01
ICD-10 Comorbidity- Wo/Psych Comorbidity nombre_region_AraucanĆ­a (09) Binary -0.11 Not Balanced, >0.1 0.01 -0.10 Balanced, <0.1 0.01
Region- BiobĆ­o(08) nombre_region_Los Lagos (10) Binary -0.11 Not Balanced, >0.1 0.02 -0.13 Not Balanced, >0.1 0.01
Region- AysƩn(11) nombre_region_Coquimbo (04) Binary -0.10 Not Balanced, >0.1 0.02 -0.12 Not Balanced, >0.1 0.02
Region- Maule(07) nombre_region_Ƒuble (16) Binary -0.10 Not Balanced, >0.1 0.01 -0.14 Not Balanced, >0.1 0.01
Marital Status- Single escolaridad_rec_3-Completed primary school or less Binary 0.09 Balanced, <0.1 0.04 0.06 Balanced, <0.1 0.03
Region- O’Higgins(06) nombre_region_ValparaĆ­so (05) Binary 0.09 Balanced, <0.1 0.02 0.12 Not Balanced, >0.1 0.04
Marital Status- Separated/Divorced escolaridad_rec_1-More than high school Binary -0.08 Balanced, <0.1 0.03 -0.09 Balanced, <0.1 0.03
Motive Admission- Health Sector origen_ingreso_mod_Justice Sector Binary -0.08 Balanced, <0.1 0.02 0.03 Balanced, <0.1 0.01
Occupational Status- No activity edad_ini_cons Contin. -0.07 Balanced, <0.1 1.00 Balanced, <2 0.06 -0.02 Balanced, <0.1 1.02 Balanced, <2 0.05
Region- Atacama(03) nombre_region_BiobĆ­o (08) Binary -0.07 Balanced, <0.1 0.02 -0.03 Balanced, <0.1 0.01
Region- Arica(15) nombre_region_AysƩn (11) Binary -0.06 Balanced, <0.1 0.00 -0.04 Balanced, <0.1 0.00
Region- Los Lagos(10) nombre_region_Magallanes (12) Binary -0.06 Balanced, <0.1 0.00 -0.13 Not Balanced, >0.1 0.01
Motive Admission- Justice Sector origen_ingreso_mod_Health Sector Binary -0.05 Balanced, <0.1 0.02 -0.06 Balanced, <0.1 0.03
Starting Substance- Marijuana estado_conyugal_2_Separated/Divorced Binary -0.04 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Marital Status- Widower escolaridad_rec_2-Completed high school or less Binary -0.03 Balanced, <0.1 0.02 0.01 Balanced, <0.1 0.00
Educational Attainment- PS or less condicion_ocupacional_corr_Looking for a job for the first time Binary -0.03 Balanced, <0.1 0.00 -0.04 Balanced, <0.1 0.00
Motive Admission- Assisted Referral origen_ingreso_mod_Other Binary 0.03 Balanced, <0.1 0.01 -0.05 Balanced, <0.1 0.01
Age of Onset of Drug Use sus_ini_mod_mvv_Other Binary 0.02 Balanced, <0.1 0.00 -0.06 Balanced, <0.1 0.01
Educational Attainment- HS or less condicion_ocupacional_corr_Inactive Binary 0.02 Balanced, <0.1 0.00 0.01 Balanced, <0.1 0.00
Region- AraucanĆ­a(09) nombre_region_Atacama (03) Binary -0.02 Balanced, <0.1 0.00 -0.01 Balanced, <0.1 0.00
Region- Coquimbo(04) nombre_region_Los RĆ­os (14) Binary 0.02 Balanced, <0.1 0.00 -0.01 Balanced, <0.1 0.00
Marital Status- Married/Shared liv. arr. estado_conyugal_2_Widower Binary -0.01 Balanced, <0.1 0.00 0.01 Balanced, <0.1 0.00
Region- Los RĆ­os(14) nombre_region_Maule (07) Binary 0.01 Balanced, <0.1 0.00 0.03 Balanced, <0.1 0.00
Region- Metropolitana(13) nombre_region_O’Higgins (06) Binary 0.01 Balanced, <0.1 0.00 -0.05 Balanced, <0.1 0.01
Starting Substance- Alcohol sus_ini_mod_mvv_Cocaine hydrochloride Binary 0.00 Balanced, <0.1 0.00 -0.09 Balanced, <0.1 0.02
Note. Unadjusted (n=55,868) ; Adjusted (n=6,189)


t_max = 60*6
solver = "gurobi" #cplex, glpk, gurobi and symphony
solver2 = list(name = solver, 
t_max = t_max, #t_max is a scalar with the maximum time limit for finding the matches.within this time limit, a partial, suboptimal solution is given
approximate = 0,#. If approximate = 1 (the default), an approximate solution is found via a relaxation of the original integer program
round_cplex = 0, 
trace = 1)#turns the optimizer output on

# Moment balance: constrain differences in means to be at most 0.1 standard deviations apart
#:#:#:#:#:#:#:#:#:#:#:#:#:

mom_covs2 = cbind(CONS_SEP_match$edad_al_ing, CONS_SEP_match$fech_ing_num, CONS_SEP_match$edad_ini_cons)
mom_tols2 = absstddif(mom_covs, t_ind, .03)
mom2 = list(covs = mom_covs2, tols = mom_tols2, targets = NULL)

# Mean balance
covs2 = cbind(CONS_SEP_match$edad_al_ing, CONS_SEP_match$fech_ing_num, CONS_SEP_match$edad_ini_cons)
meantab(covs2, t_ind)
##      Mis      Min      Max   Mean T   Mean C Std Dif P-val
## [1,]   0    15.14    86.76    34.79    34.79       0     1
## [2,]   0 13711.00 18193.00 16413.66 16413.66       0     1
## [3,]   0    -2.66    74.00    16.06    16.06       0     1
# Fine balance
#is a matrix where each column is a nominal covariate for fine balance
fine_covs2 = cbind(CONS_SEP_match$nombre_region, CONS_SEP_match$sus_ini_mod_mvv, CONS_SEP_match$estado_conyugal_2, CONS_SEP_match$escolaridad_rec, CONS_SEP_match$condicion_ocupacional_corr, CONS_SEP_match$freq_cons_sus_prin, CONS_SEP_match$origen_ingreso_mod, CONS_SEP_match$dg_cie_10_rec, CONS_SEP_match$prev_treat, CONS_SEP_match$sexo_2)
fine2 = list(covs = fine_covs2)

#MATCH
start.time2 <- Sys.time()
set.seed(2125)
out2 = cardmatch(t_ind, #ES NECESARIO QUE LOS TRATAMIENTOS ESTEN ORDENADOS Y LOS OTROS VECTORES TAMBIƋN 
                mom = mom,# ya los definĆ­ list(covs = mom_covs, tols = mom_tols, targets = mom_targets), 
          fine = fine2, 
          solver = solver2)
##   Building the matching problem... 
##   Gurobi optimizer is open... 
##   Finding the optimal matches... 
## Gurobi Optimizer version 9.1.0 build v9.1.0rc0 (win64)
## Thread count: 6 physical cores, 12 logical processors, using up to 12 threads
## Optimize a model with 59 rows, 55868 columns and 893888 nonzeros
## Model fingerprint: 0xdd1c42f4
## Variable types: 0 continuous, 55868 integer (55868 binary)
## Coefficient statistics:
##   Matrix range     [7e-02, 2e+04]
##   Objective range  [1e+00, 1e+00]
##   Bounds range     [0e+00, 0e+00]
##   RHS range        [0e+00, 0e+00]
## Found heuristic solution: objective -0.0000000
## Presolve time: 1.00s
## Presolved: 59 rows, 55868 columns, 893791 nonzeros
## Variable types: 0 continuous, 55868 integer (55868 binary)
## 
## Root relaxation: objective 7.725000e+03, 461 iterations, 0.57 seconds
## 
##     Nodes    |    Current Node    |     Objective Bounds      |     Work
##  Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time
## 
##      0     0 7725.00000    0    5   -0.00000 7725.00000      -     -    4s
## H    0     0                    7725.0000000 7725.00000  0.00%     -    4s
##      0     0 7725.00000    0    5 7725.00000 7725.00000  0.00%     -    4s
## 
## Explored 1 nodes (14617 simplex iterations) in 4.95 seconds
## Thread count was 12 (of 12 available processors)
## 
## Solution count 2: 7725 -0 
## 
## Optimal solution found (tolerance 1.00e-04)
## Best objective 7.725000000000e+03, best bound 7.725000000000e+03, gap 0.0000%
##   Optimal matches found
end.time2 <- Sys.time()
time.taken2 <- end.time2 - start.time2

t_id_2 = out2$t_id  
c_id_2 = out2$c_id  

paste0("No. of treatments: ",table(table(t_id_2)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_2))%>% formatC(big.mark = ","))
## [1] "No. of treatments: 1,261; No. of controls: 14,189"
d_match2 = CONS_SEP_match[c(t_id_2, c_id_2), ]
d_match2_no_duplicates = CONS_SEP_match[which(CONS_SEP_match$row %in% c(t_id_2, c_id_2)), ]


Explore Results of the Matching(2)


vars_ecdf<- c('edad_al_ing', 'edad_ini_cons', 'fech_ing_num')
headings <- c("Age at Admission", "Age of Onset of Drug Use", "Date of Admission")
for (i in 1:length(headings)) {
  cat("### ",headings[i],"\n")
  f<-vars_ecdf[i]
  df<-as.data.frame(CONS_SEP_match)[,f]
  ecdfplot(df, t_id_2, c_id_2, main_title = "", legend_position = "right")
  cat('\n\n')
}

Age at Admission

Figure 9. Empirical Cumulative Distribution Functions on the Matched Sample

Figure 9. Empirical Cumulative Distribution Functions on the Matched Sample

Age of Onset of Drug Use

Figure 9. Empirical Cumulative Distribution Functions on the Matched Sample

Figure 9. Empirical Cumulative Distribution Functions on the Matched Sample

Date of Admission

Figure 9. Empirical Cumulative Distribution Functions on the Matched Sample

Figure 9. Empirical Cumulative Distribution Functions on the Matched Sample


cat("### ","Love plot","\n")

Love plot

X_mat<-cbind(Region=CONS_SEP_match$nombre_region, 
            "Starting Substance"=CONS_SEP_match$sus_ini_mod_mvv,
            "Marital Status"=CONS_SEP_match$estado_conyugal_2,
            "Educational Attainment"=CONS_SEP_match$escolaridad_rec,
            "Occupational Status"=CONS_SEP_match$condicion_ocupacional_corr,
            "Freq Substance Use"=CONS_SEP_match$freq_cons_sus_prin,
            "Motive of Admission"=CONS_SEP_match$origen_ingreso_mod,
            "Psych. Comorbidity ICD-10"=CONS_SEP_match$dg_cie_10_rec,
            "Previous Treatment"=CONS_SEP_match$prev_treat, 
            "Sex"=CONS_SEP_match$sexo_2, 
            "Age at Admission"=CONS_SEP_match$edad_al_ing, 
            "Age Onset Drug Use"=CONS_SEP_match$edad_ini_cons, 
            "Date of Admission"=CONS_SEP_match$fech_ing_num)

vline = 0.1
loveplot(X_mat=X_mat, t_id=t_id_2, c_id=c_id_2, v_line=vline, legend_position = "topright")
Figure 10. Love plot of the Matched Sample in Covariates v/s Unmatched Sample

Figure 10. Love plot of the Matched Sample in Covariates v/s Unmatched Sample



options(knitr.kable.NA = '')

covs0_unmatch <- subset(CONS_SEP_match, select = -c(row, hash_key, evaluacindelprocesoteraputico_min, abandono_temprano_rec))#subset=,
covs0_match <- subset(d_match2_no_duplicates, select = -c(row, hash_key, evaluacindelprocesoteraputico_min, abandono_temprano_rec))#subset=,

#_#_#_#_#_#_generar pareamientos prepost#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
library(cobalt)
bal2_nomatch<-bal.tab(covs0_unmatch, treat = CONS_SEP_match$tipo_de_plan_res,
         thresholds = c(m = .1, v = 2),
         binary = "std", 
         continuous = "std",
         stats = c("mean.diffs", "variance.ratios","ks.statistics"))

bal2_nomatch$Balance[,2]<-round(bal2_nomatch$Balance[,2],2)
bal2_nomatch$Balance[,4]<-round(bal2_nomatch$Balance[,4],2)
bal2_nomatch$Balance[,6]<-round(bal2_nomatch$Balance[,6],2)

bal2_match<-bal.tab(covs0_match, treat = d_match2_no_duplicates$tipo_de_plan_res,
         thresholds = c(m = .1, v = 2),
         binary = "std", 
         continuous = "std",
         stats = c("mean.diffs", "variance.ratios","ks.statistics"))

bal2_match$Balance[,2]<-round(bal2_match$Balance[,2],2)
bal2_match$Balance[,4]<-round(bal2_match$Balance[,4],2)
bal2_match$Balance[,6]<-round(bal2_match$Balance[,6],2)

var_names<-c( "Age of Onset of Drug Use", "Age at Admission", "Date of Admission", "Starting Substance- Alcohol", "Starting Substance- Cocaine hydrochloride","Starting Substance- Cocaine paste", "Starting Substance- Marijuana","Starting Substance- Other", "Marital Status- Married/Shared liv. arr.","Marital Status- Separated/Divorced","Marital Status- Single","Marital Status- Widower","Educational Attainment- More than HS", "Educational Attainment- HS or less","Educational Attainment- PS or less","Occupational Status- Employed", "Occupational Status- Inactive","Occupational Status- Looking 1st job","Occupational Status- No activity", "Occupational Status- Not seeking work","Occupational Status- Unemployed","Freq Drug Cons- 1d/wk or more","Freq Drug Cons- 2-3d/wk","Freq Drug Cons- 4-6d/wk","Freq Drug Cons- Daily","Freq Drug Cons- Did not use","Freq Drug Cons- Less 1d/wk","Motive Admission- Assisted Referral", "Motive Admission- Health Sector", "Motive Admission- Justice Sector","Motive Admission- Other","Motive Admission- Spontaneous", "ICD-10 Comorbidity- Dg. Unknown/under study", "ICD-10 Comorbidity- W/Psych Comorbidity", "ICD-10 Comorbidity- Wo/Psych Comorbidity", "Region- Antofagasta(02)", "Region- AraucanĆ­a(09)","Region- Arica(15)","Region- Atacama(03)","Region- AysĆ©n(11)","Region- BiobĆ­o(08)","Region- Coquimbo(04)","Region- Los Lagos(10)","Region- Los RĆ­os(14)","Region- Magallanes(12)","Region- Maule(07)","Region- Metropolitana(13)","Region- Ƒuble(16)","Region- O'Higgins(06)","Region- TarapacĆ”(01)","Region- ValparaĆ­so(05)","Previous Treats.- 0","Previous Treats.- 1","Previous Treats.- 2 or more","Sex- Men")

pre_matched_matched<-cbind.data.frame(var_names,
                 data.table::data.table(bal2_nomatch$Balance[,1:6],keep.rownames = T),
                 data.table::data.table(bal2_match$Balance[,2:6],keep.rownames = F)
                                        ) %>% 
  janitor::clean_names() %>% 
  dplyr::arrange(desc(abs(diff_un)))

pre_matched_matched%>% 
    knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 4. Covariate Balance in the Variables of Interest"),
               col.names = c("Variables","Code","Nature of Variables", "SMDs","Threshold","Variance Ratios","Threshold","KS","SMDs","Threshold","Variance Ratios","Threshold","KS"),
               align =rep('c', 101)) %>%
  add_header_above(c(" "," "," ","Unadjusted" = 5, "Adjusted" = 5)) %>% 
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 9) %>%
  kableExtra::add_footnote( c(paste("Note.",paste0("Unadjusted (n=",dim(covs0_unmatch)[1] %>% format(big.mark=","),")"),";",paste0("Adjusted (n=",dim(covs0_match)[1] %>% format(big.mark=","),")")   )), 
                            notation = "none") %>%
  kableExtra::scroll_box(width = "100%", height = "375px") 
Table 4. Covariate Balance in the Variables of Interest
Unadjusted
Adjusted
Variables Code Nature of Variables SMDs Threshold Variance Ratios Threshold KS SMDs Threshold Variance Ratios Threshold KS
Educational Attainment- More than HS condicion_ocupacional_corr_Employed Binary -0.99 Not Balanced, >0.1 0.42 -0.89 Not Balanced, >0.1 0.38
Freq Drug Cons- 2-3d/wk freq_cons_sus_prin_Daily Binary 0.72 Not Balanced, >0.1 0.34 0.71 Not Balanced, >0.1 0.33
Occupational Status- Looking 1st job condicion_ocupacional_corr_Unemployed Binary 0.57 Not Balanced, >0.1 0.27 0.67 Not Balanced, >0.1 0.32
Occupational Status- Unemployed freq_cons_sus_prin_2 to 3 days a week Binary -0.52 Not Balanced, >0.1 0.20 -0.58 Not Balanced, >0.1 0.22
Freq Drug Cons- Less 1d/wk origen_ingreso_mod_Assisted Referral Binary 0.48 Not Balanced, >0.1 0.17 0.35 Not Balanced, >0.1 0.13
Occupational Status- Employed condicion_ocupacional_corr_No activity Binary 0.41 Not Balanced, >0.1 0.12 0.26 Not Balanced, >0.1 0.05
Motive Admission- Other dg_cie_10_rec_Without psychiatric comorbidity Binary -0.38 Not Balanced, >0.1 0.17 -0.18 Not Balanced, >0.1 0.09
Date of Admission sus_ini_mod_mvv_Alcohol Binary -0.36 Not Balanced, >0.1 0.17 -0.19 Not Balanced, >0.1 0.09
Region- TarapacĆ”(01) prev_treat_0 Binary -0.33 Not Balanced, >0.1 0.15 -0.25 Not Balanced, >0.1 0.08
Starting Substance- Cocaine paste estado_conyugal_2_Married/Shared living arrangements Binary -0.29 Not Balanced, >0.1 0.13 -0.25 Not Balanced, >0.1 0.11
Starting Substance- Other estado_conyugal_2_Single Binary 0.29 Not Balanced, >0.1 0.14 0.23 Not Balanced, >0.1 0.11
Freq Drug Cons- Did not use origen_ingreso_mod_Spontaneous Binary -0.27 Not Balanced, >0.1 0.13 -0.20 Not Balanced, >0.1 0.10
Previous Treats.- 1 sexo_2_Women Binary 0.26 Not Balanced, >0.1 0.12 0.17 Not Balanced, >0.1 0.08
ICD-10 Comorbidity- W/Psych Comorbidity nombre_region_Antofagasta (02) Binary 0.24 Not Balanced, >0.1 0.05 0.13 Not Balanced, >0.1 0.03
Region- Antofagasta(02) nombre_region_Arica (15) Binary 0.24 Not Balanced, >0.1 0.05 0.25 Not Balanced, >0.1 0.05
Previous Treats.- 0 prev_treat_2 or more Binary 0.24 Not Balanced, >0.1 0.06 0.11 Not Balanced, >0.1 0.02
Occupational Status- Not seeking work freq_cons_sus_prin_1 day a week or more Binary -0.23 Not Balanced, >0.1 0.05 -0.24 Not Balanced, >0.1 0.05
Motive Admission- Spontaneous dg_cie_10_rec_Diagnosis unknown (under study) Binary 0.23 Not Balanced, >0.1 0.10 0.04 Balanced, <0.1 0.02
Age at Admission sus_ini_mod_mvv_Marijuana Binary 0.21 Not Balanced, >0.1 0.10 0.17 Not Balanced, >0.1 0.07
Starting Substance- Cocaine hydrochloride sus_ini_mod_mvv_Cocaine paste Binary 0.21 Not Balanced, >0.1 0.07 0.08 Balanced, <0.1 0.04
Region- Ƒuble(16) nombre_region_TarapacĆ” (01) Binary 0.21 Not Balanced, >0.1 0.04 0.23 Not Balanced, >0.1 0.04
Region- ValparaĆ­so(05) prev_treat_1 Binary 0.21 Not Balanced, >0.1 0.08 0.22 Not Balanced, >0.1 0.06
Previous Treats.- 2 or more edad_al_ing Contin. -0.21 Not Balanced, >0.1 0.87 Balanced, <2 0.08 -0.12 Not Balanced, >0.1 1.08 Balanced, <2 0.08
Sex- Men fech_ing_num Contin. -0.21 Not Balanced, >0.1 0.99 Balanced, <2 0.11 0.00 Balanced, <0.1 0.89 Balanced, <2 0.04
Freq Drug Cons- Daily freq_cons_sus_prin_Less than 1 day a week Binary -0.19 Not Balanced, >0.1 0.03 -0.22 Not Balanced, >0.1 0.03
Region- Magallanes(12) nombre_region_Metropolitana (13) Binary -0.19 Not Balanced, >0.1 0.09 -0.15 Not Balanced, >0.1 0.07
Occupational Status- Inactive condicion_ocupacional_corr_Not seeking for work Binary 0.16 Not Balanced, >0.1 0.02 0.12 Not Balanced, >0.1 0.02
Freq Drug Cons- 1d/wk or more freq_cons_sus_prin_4 to 6 days a week Binary -0.13 Not Balanced, >0.1 0.05 -0.03 Balanced, <0.1 0.01
ICD-10 Comorbidity- Dg. Unknown/under study dg_cie_10_rec_With psychiatric comorbidity Binary 0.13 Not Balanced, >0.1 0.06 0.13 Not Balanced, >0.1 0.07
Freq Drug Cons- 4-6d/wk freq_cons_sus_prin_Did not use Binary -0.11 Not Balanced, >0.1 0.01 -0.15 Not Balanced, >0.1 0.01
ICD-10 Comorbidity- Wo/Psych Comorbidity nombre_region_AraucanĆ­a (09) Binary -0.11 Not Balanced, >0.1 0.01 -0.10 Balanced, <0.1 0.01
Region- BiobĆ­o(08) nombre_region_Los Lagos (10) Binary -0.11 Not Balanced, >0.1 0.02 -0.13 Not Balanced, >0.1 0.01
Region- AysƩn(11) nombre_region_Coquimbo (04) Binary -0.10 Not Balanced, >0.1 0.02 -0.12 Not Balanced, >0.1 0.02
Region- Maule(07) nombre_region_Ƒuble (16) Binary -0.10 Not Balanced, >0.1 0.01 -0.14 Not Balanced, >0.1 0.01
Marital Status- Single escolaridad_rec_3-Completed primary school or less Binary 0.09 Balanced, <0.1 0.04 0.06 Balanced, <0.1 0.03
Region- O’Higgins(06) nombre_region_ValparaĆ­so (05) Binary 0.09 Balanced, <0.1 0.02 0.12 Not Balanced, >0.1 0.04
Marital Status- Separated/Divorced escolaridad_rec_1-More than high school Binary -0.08 Balanced, <0.1 0.03 -0.09 Balanced, <0.1 0.03
Motive Admission- Health Sector origen_ingreso_mod_Justice Sector Binary -0.08 Balanced, <0.1 0.02 0.03 Balanced, <0.1 0.01
Occupational Status- No activity edad_ini_cons Contin. -0.07 Balanced, <0.1 1.00 Balanced, <2 0.06 -0.02 Balanced, <0.1 1.02 Balanced, <2 0.05
Region- Atacama(03) nombre_region_BiobĆ­o (08) Binary -0.07 Balanced, <0.1 0.02 -0.03 Balanced, <0.1 0.01
Region- Arica(15) nombre_region_AysƩn (11) Binary -0.06 Balanced, <0.1 0.00 -0.04 Balanced, <0.1 0.00
Region- Los Lagos(10) nombre_region_Magallanes (12) Binary -0.06 Balanced, <0.1 0.00 -0.13 Not Balanced, >0.1 0.01
Motive Admission- Justice Sector origen_ingreso_mod_Health Sector Binary -0.05 Balanced, <0.1 0.02 -0.06 Balanced, <0.1 0.03
Starting Substance- Marijuana estado_conyugal_2_Separated/Divorced Binary -0.04 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Marital Status- Widower escolaridad_rec_2-Completed high school or less Binary -0.03 Balanced, <0.1 0.02 0.01 Balanced, <0.1 0.00
Educational Attainment- PS or less condicion_ocupacional_corr_Looking for a job for the first time Binary -0.03 Balanced, <0.1 0.00 -0.04 Balanced, <0.1 0.00
Motive Admission- Assisted Referral origen_ingreso_mod_Other Binary 0.03 Balanced, <0.1 0.01 -0.05 Balanced, <0.1 0.01
Age of Onset of Drug Use sus_ini_mod_mvv_Other Binary 0.02 Balanced, <0.1 0.00 -0.06 Balanced, <0.1 0.01
Educational Attainment- HS or less condicion_ocupacional_corr_Inactive Binary 0.02 Balanced, <0.1 0.00 0.01 Balanced, <0.1 0.00
Region- AraucanĆ­a(09) nombre_region_Atacama (03) Binary -0.02 Balanced, <0.1 0.00 -0.01 Balanced, <0.1 0.00
Region- Coquimbo(04) nombre_region_Los RĆ­os (14) Binary 0.02 Balanced, <0.1 0.00 -0.01 Balanced, <0.1 0.00
Marital Status- Married/Shared liv. arr. estado_conyugal_2_Widower Binary -0.01 Balanced, <0.1 0.00 0.01 Balanced, <0.1 0.00
Region- Los RĆ­os(14) nombre_region_Maule (07) Binary 0.01 Balanced, <0.1 0.00 0.03 Balanced, <0.1 0.00
Region- Metropolitana(13) nombre_region_O’Higgins (06) Binary 0.01 Balanced, <0.1 0.00 -0.05 Balanced, <0.1 0.01
Starting Substance- Alcohol sus_ini_mod_mvv_Cocaine hydrochloride Binary 0.00 Balanced, <0.1 0.00 -0.09 Balanced, <0.1 0.02
Note. Unadjusted (n=55,868) ; Adjusted (n=6,189)

Moment Balance


Covariates are discretized to be binary and have only two categories.


CONS_SEP_match2= CONS_C1_df_dup_SEP_2020_match_not_miss3[order(CONS_C1_df_dup_SEP_2020_match_not_miss3$tipo_de_plan_res, decreasing = TRUE), ]

t_max = 60*6
solver = "gurobi" #cplex, glpk, gurobi and symphony
solver3 = list(name = solver, 
t_max = t_max, #t_max is a scalar with the maximum time limit for finding the matches.within this time limit, a partial, suboptimal solution is given
approximate = 0,#. If approximate = 1 (the default), an approximate solution is found via a relaxation of the original integer program
round_cplex = 1,  #ound_cplex = 1 ensures that the solution found is integral by rounding and all the constraints are exactly statisfied
trace = 1)#turns the optimizer output on

# Moment balance: constrain differences in means to be at most 0.1 standard deviations apart
#:#:#:#:#:#:#:#:#:#:#:#:#:

mom_covs3 = cbind(CONS_SEP_match2$edad_ini_cons, 
                   CONS_SEP_match2$edad_al_ing, 
                   CONS_SEP_match2$fech_ing_num, 
                   CONS_SEP_match2$sus_ini_mod_mvv_Alcohol, 
                   CONS_SEP_match2$`sus_ini_mod_mvv_Cocaine hydrochloride`, 
                   CONS_SEP_match2$`sus_ini_mod_mvv_Cocaine paste`, 
                   CONS_SEP_match2$`sus_ini_mod_mvv_Marijuana`, 
                   CONS_SEP_match2$sus_ini_mod_mvv_Other, 
                   CONS_SEP_match2$`estado_conyugal_2_Married/Shared living arrangements`, 
                   CONS_SEP_match2$`estado_conyugal_2_Separated/Divorced`,
                   CONS_SEP_match2$estado_conyugal_2_Single, 
                   CONS_SEP_match2$estado_conyugal_2_Widower,
                   CONS_SEP_match2$`escolaridad_rec_1-More than high school`,
                   CONS_SEP_match2$`escolaridad_rec_2-Completed high school or less`,
                   CONS_SEP_match2$`escolaridad_rec_3-Completed primary school or less`,
                   CONS_SEP_match2$condicion_ocupacional_corr_Employed,
                   CONS_SEP_match2$condicion_ocupacional_corr_Inactive,
                   CONS_SEP_match2$`condicion_ocupacional_corr_Looking for a job for the first time`,
                   CONS_SEP_match2$`condicion_ocupacional_corr_No activity`,
                   CONS_SEP_match2$`condicion_ocupacional_corr_Not seeking for work`,
                   CONS_SEP_match2$condicion_ocupacional_corr_Unemployed,
                   CONS_SEP_match2$`freq_cons_sus_prin_1 day a week or more`,
                   CONS_SEP_match2$`freq_cons_sus_prin_2 to 3 days a week`,
                   CONS_SEP_match2$`freq_cons_sus_prin_4 to 6 days a week`,
                   CONS_SEP_match2$freq_cons_sus_prin_Daily,
                   CONS_SEP_match2$`freq_cons_sus_prin_Did not use`,
                   CONS_SEP_match2$`freq_cons_sus_prin_Less than 1 day a week`,
                   CONS_SEP_match2$`origen_ingreso_mod_Assisted Referral`,
                   CONS_SEP_match2$`origen_ingreso_mod_Health Sector`,
                   CONS_SEP_match2$`origen_ingreso_mod_Justice Sector`,
                   CONS_SEP_match2$origen_ingreso_mod_Other,
                   CONS_SEP_match2$origen_ingreso_mod_Spontaneous,
                   CONS_SEP_match2$`dg_cie_10_rec_Diagnosis unknown (under study)`,
                   CONS_SEP_match2$`dg_cie_10_rec_With psychiatric comorbidity`,
                   CONS_SEP_match2$`dg_cie_10_rec_Without psychiatric comorbidity`,
                   CONS_SEP_match2$`nombre_region_Antofagasta (02)`,
                   CONS_SEP_match2$`nombre_region_AraucanĆ­a (09)`,
                   CONS_SEP_match2$`nombre_region_Arica (15)`,
                   CONS_SEP_match2$`nombre_region_Atacama (03)`,
                   CONS_SEP_match2$`nombre_region_AysƩn (11)`,
                   CONS_SEP_match2$`nombre_region_BiobĆ­o (08)`,
                   CONS_SEP_match2$`nombre_region_Coquimbo (04)`,
                   CONS_SEP_match2$`nombre_region_Los Lagos (10)`,
                   CONS_SEP_match2$`nombre_region_Magallanes (12)`,
                   CONS_SEP_match2$`nombre_region_Maule (07)`,
                   CONS_SEP_match2$`nombre_region_Metropolitana (13)`,
                   CONS_SEP_match2$`nombre_region_Ƒuble (16)`,
                   CONS_SEP_match2$`nombre_region_O'Higgins (06)`,
                   CONS_SEP_match2$`nombre_region_TarapacĆ” (01)`,
                   CONS_SEP_match2$`nombre_region_ValparaĆ­so (05)`,
                   CONS_SEP_match2$tipo_centro_pub_FALSE,
                   CONS_SEP_match2$tipo_centro_pub_TRUE,
                   CONS_SEP_match2$prev_treat_0,
                   CONS_SEP_match2$prev_treat_1,
                   CONS_SEP_match2$`prev_treat_2 or more`,
                   CONS_SEP_match2$sexo_2_Men,
                   CONS_SEP_match2$sexo_2_Women)
mom_tols3 = absstddif(mom_covs, t_ind, .03)
mom3 = list(covs = mom_covs3, tols = mom_tols3, targets = NULL)

# Mean balance
covs3 = cbind(CONS_SEP_match2$edad_al_ing,CONS_SEP_match2$fech_ing_num,CONS_SEP_match2$edad_ini_cons)
meantab(covs3, t_ind)
##      Mis Min Max Mean T Mean C Std Dif P-val
## [1,]   0   1  20  10.50  10.50       0     1
## [2,]   0   1  20  10.50  10.50       0     1
## [3,]   0   1  20  11.33  11.33       0     1
# Fine balance
#is a matrix where each column is a nominal covariate for fine balance
fine_covs3 = cbind(CONS_SEP_match2$edad_ini_cons, 
                   CONS_SEP_match2$edad_al_ing, 
                   CONS_SEP_match2$fech_ing_num, 
                   CONS_SEP_match2$sus_ini_mod_mvv_Alcohol, 
                   CONS_SEP_match2$`sus_ini_mod_mvv_Cocaine hydrochloride`, 
                   CONS_SEP_match2$`sus_ini_mod_mvv_Cocaine paste`, 
                   CONS_SEP_match2$`sus_ini_mod_mvv_Marijuana`, 
                   CONS_SEP_match2$sus_ini_mod_mvv_Other, 
                   CONS_SEP_match2$`estado_conyugal_2_Married/Shared living arrangements`, 
                   CONS_SEP_match2$`estado_conyugal_2_Separated/Divorced`,
                   CONS_SEP_match2$estado_conyugal_2_Single, 
                   CONS_SEP_match2$estado_conyugal_2_Widower,
                   CONS_SEP_match2$`escolaridad_rec_1-More than high school`,
                   CONS_SEP_match2$`escolaridad_rec_2-Completed high school or less`,
                   CONS_SEP_match2$`escolaridad_rec_3-Completed primary school or less`,
                   CONS_SEP_match2$condicion_ocupacional_corr_Inactive,
                   CONS_SEP_match2$`condicion_ocupacional_corr_Looking for a job for the first time`,
                   CONS_SEP_match2$`condicion_ocupacional_corr_No activity`,
                   CONS_SEP_match2$`condicion_ocupacional_corr_Not seeking for work`,
                   CONS_SEP_match2$condicion_ocupacional_corr_Unemployed,
                   CONS_SEP_match2$`freq_cons_sus_prin_1 day a week or more`,
                   CONS_SEP_match2$`freq_cons_sus_prin_2 to 3 days a week`,
                   CONS_SEP_match2$`freq_cons_sus_prin_4 to 6 days a week`,
                   CONS_SEP_match2$freq_cons_sus_prin_Daily,
                   CONS_SEP_match2$`freq_cons_sus_prin_Did not use`,
                   CONS_SEP_match2$`freq_cons_sus_prin_Less than 1 day a week`,
                   CONS_SEP_match2$`origen_ingreso_mod_Assisted Referral`,
                   CONS_SEP_match2$`origen_ingreso_mod_Health Sector`,
                   CONS_SEP_match2$`origen_ingreso_mod_Justice Sector`,
                   CONS_SEP_match2$origen_ingreso_mod_Other,
                   CONS_SEP_match2$origen_ingreso_mod_Spontaneous,
                   CONS_SEP_match2$`dg_cie_10_rec_Diagnosis unknown (under study)`,
                   CONS_SEP_match2$`dg_cie_10_rec_With psychiatric comorbidity`,
                   CONS_SEP_match2$`dg_cie_10_rec_Without psychiatric comorbidity`,
                   CONS_SEP_match2$`nombre_region_Antofagasta (02)`,
                   CONS_SEP_match2$`nombre_region_AraucanĆ­a (09)`,
                   CONS_SEP_match2$`nombre_region_Arica (15)`,
                   CONS_SEP_match2$`nombre_region_Atacama (03)`,
                   CONS_SEP_match2$`nombre_region_AysƩn (11)`,
                   CONS_SEP_match2$`nombre_region_BiobĆ­o (08)`,
                   CONS_SEP_match2$`nombre_region_Coquimbo (04)`,
                   CONS_SEP_match2$`nombre_region_Los Lagos (10)`,
                   CONS_SEP_match2$`nombre_region_Magallanes (12)`,
                   CONS_SEP_match2$`nombre_region_Maule (07)`,
                   CONS_SEP_match2$`nombre_region_Metropolitana (13)`,
                   CONS_SEP_match2$`nombre_region_Ƒuble (16)`,
                   CONS_SEP_match2$`nombre_region_O'Higgins (06)`,
                   CONS_SEP_match2$`nombre_region_TarapacĆ” (01)`,
                   CONS_SEP_match2$`nombre_region_ValparaĆ­so (05)`,
                   CONS_SEP_match2$tipo_centro_pub_FALSE,
                   CONS_SEP_match2$tipo_centro_pub_TRUE,
                   CONS_SEP_match2$prev_treat_0,
                   CONS_SEP_match2$prev_treat_1,
                   CONS_SEP_match2$`prev_treat_2 or more`,
                   CONS_SEP_match2$sexo_2_Men,
                   CONS_SEP_match2$sexo_2_Women)
fine3 = list(covs = fine_covs3)

#MATCH
start.time3 <- Sys.time()
set.seed(2125)
out3= cardmatch(t_ind, #ES NECESARIO QUE LOS TRATAMIENTOS ESTEN ORDENADOS Y LOS OTROS VECTORES TAMBIƋN 
                #mom= mom3,
                fine = fine3,# ya los definĆ­ list(covs = mom_covs, tols = mom_tols, targets = mom_targets), 
          solver = solver3)
##   Building the matching problem... 
##   Gurobi optimizer is open... 
##   Finding the optimal matches... 
## Gurobi Optimizer version 9.1.0 build v9.1.0rc0 (win64)
## Thread count: 6 physical cores, 12 logical processors, using up to 12 threads
## Optimize a model with 156 rows, 55868 columns and 3016872 nonzeros
## Model fingerprint: 0xc40593d3
## Variable types: 0 continuous, 55868 integer (55868 binary)
## Coefficient statistics:
##   Matrix range     [1e+00, 1e+00]
##   Objective range  [1e+00, 1e+00]
##   Bounds range     [0e+00, 0e+00]
##   RHS range        [0e+00, 0e+00]
## Found heuristic solution: objective -0.0000000
## Presolve removed 40 rows and 118 columns (presolve time = 5s) ...
## Presolve removed 40 rows and 118 columns
## Presolve time: 5.50s
## Presolved: 116 rows, 55750 columns, 1205863 nonzeros
## Variable types: 0 continuous, 55750 integer (55632 binary)
## 
## Root simplex log...
## 
## Iteration    Objective       Primal Inf.    Dual Inf.      Time
##        0    7.7250000e+03   1.651330e+05   0.000000e+00      6s
##      886    7.7250000e+03   0.000000e+00   0.000000e+00      7s
## 
## Root relaxation: objective 7.725000e+03, 886 iterations, 1.35 seconds
## Total elapsed time = 10.41s
## 
##     Nodes    |    Current Node    |     Objective Bounds      |     Work
##  Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time
## 
##      0     0 7725.00000    0   75   -0.00000 7725.00000      -     -   10s
## H    0     0                    7725.0000000 7725.00000  0.00%     -   11s
##      0     0 7725.00000    0   75 7725.00000 7725.00000  0.00%     -   11s
## 
## Explored 1 nodes (12891 simplex iterations) in 11.11 seconds
## Thread count was 12 (of 12 available processors)
## 
## Solution count 2: 7725 -0 
## 
## Optimal solution found (tolerance 1.00e-04)
## Best objective 7.725000000000e+03, best bound 7.725000000000e+03, gap 0.0000%
##   Optimal matches found
end.time3 <- Sys.time()
time.taken3 <- end.time3 - start.time3

t_id_3 = out2$t_id  
c_id_3 = out2$c_id  

paste0("No. of treatments: ",table(table(t_id_3)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_3))%>% formatC(big.mark = ","))
## [1] "No. of treatments: 1,261; No. of controls: 14,189"
d_match3 = CONS_SEP_match2[c(t_id_3, c_id_3), ]
d_match3_no_duplicates = CONS_SEP_match2[which(CONS_SEP_match2$row %in% c(t_id_3, c_id_3)), ]


Explore Results of the Matching (3)


vars_ecdf<- c('edad_al_ing', 'edad_ini_cons', 'fech_ing_num')
headings <- c("Age at Admission", "Age of Onset of Drug Use", "Date of Admission")
for (i in 1:length(headings)) {
  cat("### ",headings[i],"\n")
  f<-vars_ecdf[i]
  df<-as.data.frame(CONS_SEP_match2)[,f]
  ecdfplot(df, t_id_3, c_id_3, main_title = "", legend_position = "right")
  cat('\n\n')
}

Age at Admission

Figure 11. Empirical Cumulativ e Distribution Functions on the Matched Sample

Figure 11. Empirical Cumulativ e Distribution Functions on the Matched Sample

Age of Onset of Drug Use

Figure 11. Empirical Cumulativ e Distribution Functions on the Matched Sample

Figure 11. Empirical Cumulativ e Distribution Functions on the Matched Sample

Date of Admission

Figure 11. Empirical Cumulativ e Distribution Functions on the Matched Sample

Figure 11. Empirical Cumulativ e Distribution Functions on the Matched Sample

cat("### ","Love plot","\n")

Love plot

X_mat<-subset(CONS_SEP_match2, select = -c(row, hash_key, evaluacindelprocesoteraputico_min, abandono_temprano_rec))


vline = 0.1
loveplot(X_mat=X_mat, t_id=t_id_3, c_id=c_id_3, v_line=vline, legend_position = "topright")
Figure 12. Love plot of the Matched Sample in Covariates v/s Unmatched Sample

Figure 12. Love plot of the Matched Sample in Covariates v/s Unmatched Sample


options(knitr.kable.NA = '')

covs0_unmatch <- subset(CONS_SEP_match2, select = -c(row, hash_key, evaluacindelprocesoteraputico_min, abandono_temprano_rec))#subset=,
covs0_match <- subset(d_match3_no_duplicates, select = -c(row, hash_key, evaluacindelprocesoteraputico_min, abandono_temprano_rec))#subset=,

#_#_#_#_#_#_generar pareamientos prepost#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
library(cobalt)
bal3_nomatch<-bal.tab(covs0_unmatch, treat = CONS_SEP_match2$tipo_de_plan_res,
         thresholds = c(m = .1, v = 2),
         binary = "std", 
         continuous = "std",
         stats = c("mean.diffs", "variance.ratios","ks.statistics"))

bal3_nomatch$Balance[,2]<-round(bal3_nomatch$Balance[,2],2)
bal3_nomatch$Balance[,4]<-round(bal3_nomatch$Balance[,4],2)
bal3_nomatch$Balance[,6]<-round(bal3_nomatch$Balance[,6],2)


bal3_match<-bal.tab(covs0_match, treat = d_match3_no_duplicates$tipo_de_plan_res,
         thresholds = c(m = .1, v = 2),
         binary = "std", 
         continuous = "std",
         stats = c("mean.diffs", "variance.ratios","ks.statistics"))

bal3_match$Balance[,2]<-round(bal3_match$Balance[,2],2)
bal3_match$Balance[,4]<-round(bal3_match$Balance[,4],2)
bal3_match$Balance[,6]<-round(bal3_match$Balance[,6],2)

var_names<-c( "Age of Onset of Drug Use", "Age at Admission", "Date of Admission", "Starting Substance- Alcohol", "Starting Substance- Cocaine hydrochloride","Starting Substance- Cocaine paste", "Starting Substance- Marijuana","Starting Substance- Other", "Marital Status- Married/Shared liv. arr.","Marital Status- Separated/Divorced","Marital Status- Single","Marital Status- Widower","Ed Att- More than HS", "Ed Att- HS or less","Ed Att- PS or less","Occ Status- Employed", "Occ Status- Inactive","Occ Status- Looking 1st job","Occ Status- No activity", "Occ Status- Not seeking work","Occ Status- Unemployed","Freq Drug Cons- 1d/wk or more","Freq Drug Cons- 2-3d/wk","Freq Drug Cons- 4-6d/wk","Freq Drug Cons- Daily","Freq Drug Cons- Did not use","Freq Drug Cons- Less 1d/wk","Motive Admission- Assisted Referral", "Motive Admission- Health Sector", "Motive Admission- Justice Sector","Motive Admission- Other","Motive Admission- Spontaneous", "ICD-10 Comorbidity- Dg. Unknown/under study", "ICD-10 Comorbidity- W/Psych Comorbidity", "ICD-10 Comorbidity- Wo/Psych Comorbidity", "Region- Antofagasta(02)", "Region- AraucanĆ­a(09)","Region- Arica(15)","Region- Atacama(03)","Region- AysĆ©n(11)","Region- BiobĆ­o(08)","Region- Coquimbo(04)","Region- Los Lagos(10)","Region- Los RĆ­os(14)","Region- Magallanes(12)","Region- Maule(07)","Region- Metropolitana(13)","Region- Ƒuble(16)","Region- O'Higgins(06)","Region- TarapacĆ”(01)","Region- ValparaĆ­so(05)","Previous Treats.- 0","Previous Treats.- 1","Previous Treats.- 2 or more","Sex- Men")

pre_matched_matched<-cbind.data.frame(var_names,
                 data.table::data.table(bal3_nomatch$Balance[,1:6],keep.rownames = T),
                 data.table::data.table(bal3_match$Balance[,2:6],keep.rownames = F)
                                        ) %>% 
  janitor::clean_names() %>% 
  dplyr::arrange(desc(abs(diff_un)))

pre_matched_matched%>% 
    knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 5. Covariate Balance in the Variables of Interest"),
               col.names = c("Variables","Code","Nature of Variables", "SMDs","Threshold","Variance Ratios","Threshold","KS","SMDs","Threshold","Variance Ratios","Threshold","KS"),
               align =rep('c', 101)) %>%
    add_header_above(c(" "," "," ","Unadjusted" = 5, "Adjusted" = 5)) %>% 
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 9) %>%
  kableExtra::add_footnote( c(paste("Note. ")), 
                            notation = "none") %>%
  kableExtra::scroll_box(width = "100%", height = "375px")
Table 5. Covariate Balance in the Variables of Interest
Unadjusted
Adjusted
Variables Code Nature of Variables SMDs Threshold Variance Ratios Threshold KS SMDs Threshold Variance Ratios Threshold KS
Occ Status- Employed condicion_ocupacional_corr_Employed Binary -0.99 Not Balanced, >0.1 0.42 -0.89 Not Balanced, >0.1 0.38
Freq Drug Cons- Daily freq_cons_sus_prin_Daily Binary 0.72 Not Balanced, >0.1 0.34 0.71 Not Balanced, >0.1 0.33
Occ Status- Unemployed condicion_ocupacional_corr_Unemployed Binary 0.57 Not Balanced, >0.1 0.27 0.67 Not Balanced, >0.1 0.32
Freq Drug Cons- 2-3d/wk freq_cons_sus_prin_2 to 3 days a week Binary -0.52 Not Balanced, >0.1 0.20 -0.58 Not Balanced, >0.1 0.22
Motive Admission- Assisted Referral origen_ingreso_mod_Assisted Referral Binary 0.48 Not Balanced, >0.1 0.17 0.35 Not Balanced, >0.1 0.13
Occ Status- No activity condicion_ocupacional_corr_No activity Binary 0.41 Not Balanced, >0.1 0.12 0.26 Not Balanced, >0.1 0.05
ICD-10 Comorbidity- Wo/Psych Comorbidity dg_cie_10_rec_Without psychiatric comorbidity Binary -0.38 Not Balanced, >0.1 0.17 -0.18 Not Balanced, >0.1 0.09
Starting Substance- Alcohol sus_ini_mod_mvv_Alcohol Binary -0.36 Not Balanced, >0.1 0.17 -0.19 Not Balanced, >0.1 0.09
Previous Treats.- 0 prev_treat_0 Binary -0.33 Not Balanced, >0.1 0.15 -0.25 Not Balanced, >0.1 0.08
Marital Status- Married/Shared liv. arr. estado_conyugal_2_Married/Shared living arrangements Binary -0.29 Not Balanced, >0.1 0.13 -0.25 Not Balanced, >0.1 0.11
Marital Status- Single estado_conyugal_2_Single Binary 0.29 Not Balanced, >0.1 0.14 0.23 Not Balanced, >0.1 0.11
Motive Admission- Spontaneous origen_ingreso_mod_Spontaneous Binary -0.27 Not Balanced, >0.1 0.13 -0.20 Not Balanced, >0.1 0.10
Sex- Men sexo_2_Men Binary -0.26 Not Balanced, >0.1 0.12 -0.17 Not Balanced, >0.1 0.08
Region- Antofagasta(02) nombre_region_Antofagasta (02) Binary 0.24 Not Balanced, >0.1 0.05 0.13 Not Balanced, >0.1 0.03
Region- Arica(15) nombre_region_Arica (15) Binary 0.24 Not Balanced, >0.1 0.05 0.25 Not Balanced, >0.1 0.05
Previous Treats.- 2 or more prev_treat_2 or more Binary 0.24 Not Balanced, >0.1 0.06 0.11 Not Balanced, >0.1 0.02
Freq Drug Cons- 1d/wk or more freq_cons_sus_prin_1 day a week or more Binary -0.23 Not Balanced, >0.1 0.05 -0.24 Not Balanced, >0.1 0.05
ICD-10 Comorbidity- Dg. Unknown/under study dg_cie_10_rec_Diagnosis unknown (under study) Binary 0.23 Not Balanced, >0.1 0.10 0.04 Balanced, <0.1 0.02
Age at Admission edad_al_ing Contin. -0.21 Not Balanced, >0.1 0.99 Balanced, <2 0.08 -0.14 Not Balanced, >0.1 1.08 Balanced, <2 0.08
Date of Admission fech_ing_num Contin. -0.21 Not Balanced, >0.1 0.97 Balanced, <2 0.10 -0.03 Balanced, <0.1 0.93 Balanced, <2 0.02
Starting Substance- Cocaine paste sus_ini_mod_mvv_Cocaine paste Binary 0.21 Not Balanced, >0.1 0.07 0.08 Balanced, <0.1 0.04
Starting Substance- Marijuana sus_ini_mod_mvv_Marijuana Binary 0.21 Not Balanced, >0.1 0.10 0.17 Not Balanced, >0.1 0.07
Region- TarapacĆ”(01) nombre_region_TarapacĆ” (01) Binary 0.21 Not Balanced, >0.1 0.04 0.23 Not Balanced, >0.1 0.04
Previous Treats.- 1 prev_treat_1 Binary 0.21 Not Balanced, >0.1 0.08 0.22 Not Balanced, >0.1 0.06
Freq Drug Cons- Less 1d/wk freq_cons_sus_prin_Less than 1 day a week Binary -0.19 Not Balanced, >0.1 0.03 -0.22 Not Balanced, >0.1 0.03
Region- Metropolitana(13) nombre_region_Metropolitana (13) Binary -0.19 Not Balanced, >0.1 0.09 -0.15 Not Balanced, >0.1 0.07
Occ Status- Not seeking work condicion_ocupacional_corr_Not seeking for work Binary 0.16 Not Balanced, >0.1 0.02 0.12 Not Balanced, >0.1 0.02
Age of Onset of Drug Use edad_ini_cons Contin. -0.13 Not Balanced, >0.1 1.04 Balanced, <2 0.06 -0.08 Balanced, <0.1 1.01 Balanced, <2 0.05
Freq Drug Cons- 4-6d/wk freq_cons_sus_prin_4 to 6 days a week Binary -0.13 Not Balanced, >0.1 0.05 -0.03 Balanced, <0.1 0.01
ICD-10 Comorbidity- W/Psych Comorbidity dg_cie_10_rec_With psychiatric comorbidity Binary 0.13 Not Balanced, >0.1 0.06 0.13 Not Balanced, >0.1 0.07
Freq Drug Cons- Did not use freq_cons_sus_prin_Did not use Binary -0.11 Not Balanced, >0.1 0.01 -0.15 Not Balanced, >0.1 0.01
Region- AraucanĆ­a(09) nombre_region_AraucanĆ­a (09) Binary -0.11 Not Balanced, >0.1 0.01 -0.10 Balanced, <0.1 0.01
Region- Los Lagos(10) nombre_region_Los Lagos (10) Binary -0.11 Not Balanced, >0.1 0.02 -0.13 Not Balanced, >0.1 0.01
Region- Coquimbo(04) nombre_region_Coquimbo (04) Binary -0.10 Not Balanced, >0.1 0.02 -0.12 Not Balanced, >0.1 0.02
Region- Ƒuble(16) nombre_region_Ƒuble (16) Binary -0.10 Not Balanced, >0.1 0.01 -0.14 Not Balanced, >0.1 0.01
Ed Att- PS or less escolaridad_rec_3-Completed primary school or less Binary 0.09 Balanced, <0.1 0.04 0.06 Balanced, <0.1 0.03
Region- ValparaĆ­so(05) nombre_region_ValparaĆ­so (05) Binary 0.09 Balanced, <0.1 0.02 0.12 Not Balanced, >0.1 0.04
Ed Att- More than HS escolaridad_rec_1-More than high school Binary -0.08 Balanced, <0.1 0.03 -0.09 Balanced, <0.1 0.03
Motive Admission- Justice Sector origen_ingreso_mod_Justice Sector Binary -0.08 Balanced, <0.1 0.02 0.03 Balanced, <0.1 0.01
Region- BiobĆ­o(08) nombre_region_BiobĆ­o (08) Binary -0.07 Balanced, <0.1 0.02 -0.03 Balanced, <0.1 0.01
Region- AysƩn(11) nombre_region_AysƩn (11) Binary -0.06 Balanced, <0.1 0.00 -0.04 Balanced, <0.1 0.00
Region- Magallanes(12) nombre_region_Magallanes (12) Binary -0.06 Balanced, <0.1 0.00 -0.13 Not Balanced, >0.1 0.01
Motive Admission- Health Sector origen_ingreso_mod_Health Sector Binary -0.05 Balanced, <0.1 0.02 -0.06 Balanced, <0.1 0.03
Marital Status- Separated/Divorced estado_conyugal_2_Separated/Divorced Binary -0.04 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Ed Att- HS or less escolaridad_rec_2-Completed high school or less Binary -0.03 Balanced, <0.1 0.02 0.01 Balanced, <0.1 0.00
Occ Status- Looking 1st job condicion_ocupacional_corr_Looking for a job for the first time Binary -0.03 Balanced, <0.1 0.00 -0.04 Balanced, <0.1 0.00
Motive Admission- Other origen_ingreso_mod_Other Binary 0.03 Balanced, <0.1 0.01 -0.05 Balanced, <0.1 0.01
Starting Substance- Other sus_ini_mod_mvv_Other Binary 0.02 Balanced, <0.1 0.00 -0.06 Balanced, <0.1 0.01
Occ Status- Inactive condicion_ocupacional_corr_Inactive Binary 0.02 Balanced, <0.1 0.00 0.01 Balanced, <0.1 0.00
Region- Atacama(03) nombre_region_Atacama (03) Binary -0.02 Balanced, <0.1 0.00 -0.01 Balanced, <0.1 0.00
Region- Los RĆ­os(14) nombre_region_Los RĆ­os (14) Binary 0.02 Balanced, <0.1 0.00 -0.01 Balanced, <0.1 0.00
Marital Status- Widower estado_conyugal_2_Widower Binary -0.01 Balanced, <0.1 0.00 0.01 Balanced, <0.1 0.00
Region- Maule(07) nombre_region_Maule (07) Binary 0.01 Balanced, <0.1 0.00 0.03 Balanced, <0.1 0.00
Region- O’Higgins(06) nombre_region_O’Higgins (06) Binary 0.01 Balanced, <0.1 0.00 -0.05 Balanced, <0.1 0.01
Starting Substance- Cocaine hydrochloride sus_ini_mod_mvv_Cocaine hydrochloride Binary 0.00 Balanced, <0.1 0.00 -0.09 Balanced, <0.1 0.02
Note.
#############################################
# Standardized differences before matching
#############################################

smd_internal<-function(var,db){
  #db<-as.data.frame(db)
  smd_vec<-round(abs(mean(unlist(get(db)[which(get(db)$tipo_de_plan_res==1),..var]),na.rm=F)- mean(unlist(get(db)[which(get(db)$tipo_de_plan_res==0),..var]),na.rm=F))/
          sqrt(((sd(unlist(get(db)[which(get(db)$tipo_de_plan_res==1),..var]),na.rm=F))^2 + (sd(unlist(get(db)[which(get(db)$tipo_de_plan_res==0),..var]),na.rm=F))^2 )/2),2)
#return(assign(paste0(var,"_smd"),smd_vec,envir=.GlobalEnv))
  return(print(smd_vec))
}

smd_df<-data.frame()
for (i in 1:length(names(covs0_unmatch))){
  smd_df<-rbind(smd_df,cbind.data.frame(names(covs0_unmatch)[i],
                             smd_internal(names(covs0_unmatch)[i],"covs0_unmatch")))
}
smd_df<-
smd_df %>% 
  dplyr::rename("vars"= !!names(.[1]),"smd"= !!names(.[2])) %>% 
  dplyr::filter(vars!="tipo_de_plan_res_TRUE") %>% 
  dplyr::arrange(desc(smd))
##################################################
# Standardized differences after matching
##################################################

smd_df2<-data.frame()
for (i in 1:length(names(covs0_match))){
  smd_df2<-rbind(smd_df2,cbind.data.frame(names(covs0_match)[i],
                             smd_internal(names(covs0_match)[i],"covs0_match")))
}
smd_df2<-
  smd_df2 %>% 
  dplyr::rename("vars"= !!names(.[1]),"smd"= !!names(.[2])) %>% 
  dplyr::filter(vars!="tipo_de_plan_res_TRUE")

smd_df_prev_match_after_match<-
  smd_df %>% 
  dplyr::left_join(smd_df2,by="vars") %>% 
  dplyr::rename("Before\nMatching"= !!names(.[2]),"After\nMatching"= !!names(.[3])) %>% 
  melt()
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
# Data set with stand. diff. before post matchig

# Generate plot
loveplot_bal3 = ggplot(data = smd_df_prev_match_after_match, mapping = aes(x = reorder(vars, value), y = value,
                                                    group = variable, color = variable)) +
  geom_hline(yintercept = 0.1, color = "black", size = 0.2, linetype="dashed") +
  coord_flip() +
  theme_bw() + 
  theme(legend.key = element_blank()) +
  labs(y = "Average absolute standardized differences in means",x = "",color="",shape="") +
  scale_y_continuous(limits = c(0,1), breaks = seq(0,1,by=.1)) +
  scale_color_manual(values=c("gray20","black")) + 
  geom_point(aes(shape=variable),size=4) +
  scale_shape_manual(values=c(8,16)) + 
  theme(text = element_text(size=7))

loveplot_bal3
Figure 13. Love plot of the Matched Sample in Covariates v/s Unmatched Sample

Figure 13. Love plot of the Matched Sample in Covariates v/s Unmatched Sample

Inference

total_sample<-CONS_C1_df_dup_SEP_2020_match_not_miss2 %>% 
  dplyr::filter(row %in% unlist(CONS_SEP_match$row))
matched_sample1<-CONS_C1_df_dup_SEP_2020_match_not_miss2 %>%  dplyr::filter(row %in% c(t_id_1, c_id_1))
matched_sample2<-CONS_C1_df_dup_SEP_2020_match_not_miss2[which(CONS_C1_df_dup_SEP_2020_match_not_miss2$row %in% c(t_id_2, c_id_2)), ]
matched_sample3<-CONS_C1_df_dup_SEP_2020_match_not_miss2[which(CONS_C1_df_dup_SEP_2020_match_not_miss2$row %in% c(t_id_3, c_id_3)), ]
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

total_sample$abandono_temprano_rec<-car::recode(as.character(total_sample$abandono_temprano_rec), "FALSE='0.Late Widthdrawal';TRUE='1.Early Widthdrawal'")
total_sample$tipo_de_plan_res<-car::recode(as.character(total_sample$tipo_de_plan_res), "TRUE='1.Residential Plan'; FALSE='0.Outpatient Plan'")
matched_sample1$abandono_temprano_rec<-car::recode(as.character(matched_sample1$abandono_temprano_rec), "FALSE='0.Late Widthdrawal';TRUE='1.Early Widthdrawal'")
matched_sample1$tipo_de_plan_res<-car::recode(as.character(matched_sample1$tipo_de_plan_res), "TRUE='1.Residential Plan'; FALSE='0.Outpatient Plan'")
matched_sample2$abandono_temprano_rec<-car::recode(as.character(matched_sample2$abandono_temprano_rec), "FALSE='0.Late Widthdrawal';TRUE='1.Early Widthdrawal'")
matched_sample2$tipo_de_plan_res<-car::recode(as.character(matched_sample2$tipo_de_plan_res), "TRUE='1.Residential Plan'; FALSE='0.Outpatient Plan'")
matched_sample3$abandono_temprano_rec<-car::recode(as.character(matched_sample3$abandono_temprano_rec), "FALSE='0.Late Widthdrawal';TRUE='1.Early Widthdrawal'")
matched_sample3$tipo_de_plan_res<-car::recode(as.character(matched_sample3$tipo_de_plan_res), "TRUE='1.Residential Plan'; FALSE='0.Outpatient Plan'")

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#https://rstudio-pubs-static.s3.amazonaws.com/456237_0a3748b8fc8d4ee0bf4f1070a38d28e6.html

#temp0<-Epi::twoby2(table(rev(total_sample$tipo_de_plan_res),rev(total_sample$abandono_temprano_rec)))
temp0<-Epi::twoby2(table(total_sample$tipo_de_plan_res,total_sample$abandono_temprano_rec)[,2:1])
## 2 by 2 table analysis: 
## ------------------------------------------------------ 
## Outcome   : 1.Early Widthdrawal 
## Comparing : 0.Outpatient Plan vs. 1.Residential Plan 
## 
##                    1.Early Widthdrawal 0.Late Widthdrawal
## 0.Outpatient Plan                14002              34141
## 1.Residential Plan                3933               3792
##                       P(1.Early Widthdrawal) 95% conf. interval
## 0.Outpatient Plan                     0.2908    0.2868   0.2949
## 1.Residential Plan                    0.5091    0.4980   0.5203
## 
##                                     95% conf. interval
##              Relative Risk:  0.5713    0.5566   0.5863
##          Sample Odds Ratio:  0.3954    0.3766   0.4152
##     Probability difference: -0.2183   -0.2301  -0.2064
##  
##         Asymptotic P-value: 0.0000 
## ------------------------------------------------------
temp1<-Epi::twoby2(table(matched_sample1$tipo_de_plan_res,matched_sample1$abandono_temprano_rec)[,2:1])
## 2 by 2 table analysis: 
## ------------------------------------------------------ 
## Outcome   : 1.Early Widthdrawal 
## Comparing : 0.Outpatient Plan vs. 1.Residential Plan 
## 
##                    1.Early Widthdrawal 0.Late Widthdrawal
## 0.Outpatient Plan                 1251               3838
## 1.Residential Plan                 487                613
##                       P(1.Early Widthdrawal) 95% conf. interval
## 0.Outpatient Plan                     0.2458    0.2342   0.2578
## 1.Residential Plan                    0.4427    0.4136   0.4722
## 
##                                     95% conf. interval
##              Relative Risk:  0.5553    0.5116   0.6027
##          Sample Odds Ratio:  0.4103    0.3585   0.4696
## Conditional MLE Odds Ratio:  0.4103    0.3577   0.4708
##     Probability difference: -0.1969   -0.2286  -0.1654
## 
##              Exact P-value: 0.0000 
##         Asymptotic P-value: 0.0000 
## ------------------------------------------------------
temp2<-Epi::twoby2(table(matched_sample2$tipo_de_plan_res,matched_sample2$abandono_temprano_rec)[,2:1])
## 2 by 2 table analysis: 
## ------------------------------------------------------ 
## Outcome   : 1.Early Widthdrawal 
## Comparing : 0.Outpatient Plan vs. 1.Residential Plan 
## 
##                    1.Early Widthdrawal 0.Late Widthdrawal
## 0.Outpatient Plan                 1251               3838
## 1.Residential Plan                 487                613
##                       P(1.Early Widthdrawal) 95% conf. interval
## 0.Outpatient Plan                     0.2458    0.2342   0.2578
## 1.Residential Plan                    0.4427    0.4136   0.4722
## 
##                                     95% conf. interval
##              Relative Risk:  0.5553    0.5116   0.6027
##          Sample Odds Ratio:  0.4103    0.3585   0.4696
## Conditional MLE Odds Ratio:  0.4103    0.3577   0.4708
##     Probability difference: -0.1969   -0.2286  -0.1654
## 
##              Exact P-value: 0.0000 
##         Asymptotic P-value: 0.0000 
## ------------------------------------------------------
temp3<-Epi::twoby2(table(matched_sample3$tipo_de_plan_res,matched_sample3$abandono_temprano_rec)[,2:1])
## 2 by 2 table analysis: 
## ------------------------------------------------------ 
## Outcome   : 1.Early Widthdrawal 
## Comparing : 0.Outpatient Plan vs. 1.Residential Plan 
## 
##                    1.Early Widthdrawal 0.Late Widthdrawal
## 0.Outpatient Plan                 1251               3838
## 1.Residential Plan                 487                613
##                       P(1.Early Widthdrawal) 95% conf. interval
## 0.Outpatient Plan                     0.2458    0.2342   0.2578
## 1.Residential Plan                    0.4427    0.4136   0.4722
## 
##                                     95% conf. interval
##              Relative Risk:  0.5553    0.5116   0.6027
##          Sample Odds Ratio:  0.4103    0.3585   0.4696
## Conditional MLE Odds Ratio:  0.4103    0.3577   0.4708
##     Probability difference: -0.1969   -0.2286  -0.1654
## 
##              Exact P-value: 0.0000 
##         Asymptotic P-value: 0.0000 
## ------------------------------------------------------
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

res_unadj_oddsratio <- data.frame(out = "Total Sample",
                                    odds.ratio = round(temp0$measures[2,1],2),
                                    conf.low = round(temp0$measures[2,2],2),
                                    conf.high = round(temp0$measures[2,3],2))
res_adj_oddsratio1 <- data.frame(out = "1st Matched Sample",
                                    odds.ratio = round(temp1$measures[2,1],2),
                                    conf.low = round(temp1$measures[2,2],2),
                                    conf.high = round(temp1$measures[2,3],2))
res_adj_oddsratio2 <- data.frame(out = "2nd Matched Sample",
                                    odds.ratio = round(temp2$measures[2,1],2),
                                    conf.low = round(temp2$measures[2,2],2),
                                    conf.high = round(temp2$measures[2,3],2))
res_adj_oddsratio3 <- data.frame(out = "3rd Matched Sample",
                                    odds.ratio = round(temp3$measures[2,1],2),
                                    conf.low = round(temp3$measures[2,2],2),
                                    conf.high = round(temp3$measures[2,3],2))


mcnemar_tot<-mcnemar.test(total_sample$tipo_de_plan_res,total_sample$abandono_temprano_rec, correct = F)
mcnemar_s1<-mcnemar.test(matched_sample1$tipo_de_plan_res,matched_sample1$abandono_temprano_rec, correct = F)
mcnemar_s2<-mcnemar.test(matched_sample2$tipo_de_plan_res,matched_sample2$abandono_temprano_rec, correct = F)
mcnemar_s3<-mcnemar.test(matched_sample3$tipo_de_plan_res,matched_sample3$abandono_temprano_rec, correct = F)

rbind.data.frame(res_unadj_oddsratio,res_adj_oddsratio1, res_adj_oddsratio2, res_adj_oddsratio3) %>% 
knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 6. Odds Ratio"),
               col.names = c("Sample","OR","OR CI95% Lower", "OR CI95% Upper"),
               align =rep('c', 101)) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 9) %>%
  kableExtra::add_footnote( c(paste("Note. ")), 
                            notation = "none") %>%
  kableExtra::scroll_box(width = "100%", height = "210px")
Table 6. Odds Ratio
Sample OR OR CI95% Lower OR CI95% Upper
Total Sample 0.40 0.38 0.42
1st Matched Sample 0.41 0.36 0.47
2nd Matched Sample 0.41 0.36 0.47
3rd Matched Sample 0.41 0.36 0.47
Note.

Sensitiviy Analyses

We estimated the sensibility of the associations between the early withdrawal and the residential plans assuming different values of the sensitivity parameter from 1 to 2 (\(\Gamma\)).


tab1 <- as.matrix(table(total_sample$tipo_de_plan_res,total_sample$abandono_temprano_rec))
tab2 <- as.matrix(table(matched_sample1$tipo_de_plan_res,matched_sample1$abandono_temprano_rec))
tab3 <- as.matrix(table(matched_sample2$tipo_de_plan_res,matched_sample2$abandono_temprano_rec))
tab4 <- as.matrix(table(matched_sample3$tipo_de_plan_res,matched_sample3$abandono_temprano_rec))

#mhLS(tab2,tab1,Gamma=1)
df_sens<-data.frame(sample=c("total sample",
                             "matched sample1",
                             "matched sample2",
                             "matched sample3"),
                    tab=paste0("tab",1:4))
    mh_results<- data.frame()
  for (f in 1:dim(df_sens)[1]){
      prob_gamma<- seq(1,3,.01)  
      for (i in 1:length(prob_gamma)){
        results <- mh(get(df_sens[f,2]),Gamma=prob_gamma[i])
        mh_results<-rbind.data.frame(mh_results,
                    cbind(df_sens[f,1],prob_gamma[i],unlist(results)[1],unlist(results)[2]))
      }  
}
colnames(mh_results)<- c("sample","gamma","p-value","A")
max_gamma<-
mh_results %>% 
    dplyr::filter(`p-value`<.05) %>% 
    dplyr::summarise(max(gamma))
#  mh_results %>% 
#    dplyr::mutate("p-value"=round(as.numeric(`p-value`),4)) %>% 
#knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
#               caption = paste0("Table 7. Sensitivity Tests"),
#               col.names = c("Sample","Gamma","p-value", "A"),
#               align =rep('c', 101)) %>%
#  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 9) %>%
#  kableExtra::add_footnote( c(paste("Note. ")), 
#                            notation = "none") %>%
#  kableExtra::scroll_box(width = "100%", height = "510px")
mh_plot_results<-
  mh_results %>% 
    dplyr::mutate("p-value"=round(as.numeric(`p-value`),8)) %>%
    dplyr::mutate("gamma"=as.numeric(`gamma`)) %>%
    dplyr::mutate(sample=factor(sample, levels = c("total sample","matched sample1","matched sample2","matched sample3"))) %>% 
    ggplot(aes(x=gamma, y=`p-value`,color=sample))+
    geom_line(alpha=.3)+
    geom_hline(yintercept=.05, linetype=2, color="red")+
    sjPlot::theme_sjplot2() 
tooltip_css <- "background-color:gray;color:white;font-style:italic;padding:10px;border-radius:10px 20px 10px 20px;"
ggiraph(code = {print(mh_plot_results)}, tooltip_extra_css = tooltip_css, tooltip_opacity = .75)

Figure 14. Sensibility Parameters for the Models Specified

#interpretación resultados:
#https://www.eafit.edu.co/centros/urbam/proyectos/Documents/American%20Journal%20of%20Epidemiology.pdf


Sensitivity analysis revealed that these effects were robust, as an unobserved variable would need to increase the odds of treatment by at least 142% (\(\Gamma\) = 2.42) in order to confound the estimated treatment effects.


#table(matched_sample1$abandono_temprano_rec,matched_sample1$tipo_de_plan_res)

###
mcn.exact.p <- function(n, p, prob){
  choose(n,b)* prob^b * (1-prob)^(n-b)
}
###
b <- temp0$table[1,2]:(temp0$table[1,2]+temp0$table[2,1])
gamma <- seq(0, 2, .1)
prob <- gamma/(1+gamma)
  allresults <- list()
  for (i in 1:length(prob)){
    results <- sapply(b, mcn.exact.p, n=(temp0$table[1,2]+temp0$table[2,1]),
                      prob= prob[i])
    allresults[[i]]<-results
  }
pvals <- 2* unlist(lapply(allresults,sum))

Session Info

Sys.getenv("R_LIBS_USER")
## [1] "C:/Users/CISS Fondecyt/OneDrive/Documentos/R/win-library/4.0"
rstudioapi::getSourceEditorContext()
## Document Context: 
## - id:        '0D858838'
## - path:      'G:/Mi unidad/Alvacast/SISTRAT 2019 (github)/SUD_CL/Matching_Process.Rmd'
## - contents:  <2682 rows>
## Document Selection:
## - [2605, 45] -- [2605, 45]: ''
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=Spanish_Chile.1252  LC_CTYPE=Spanish_Chile.1252   
## [3] LC_MONETARY=Spanish_Chile.1252 LC_NUMERIC=C                  
## [5] LC_TIME=Spanish_Chile.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] gdtools_0.2.2           Amelia_1.7.6            Rcpp_1.0.5             
##  [4] polycor_0.7-10          DiagrammeR_1.0.6.1.9000 gurobi_9.1-0           
##  [7] radiant.update_1.4.1    cobalt_4.2.3            sensitivityfull_1.5.6  
## [10] sensitivity2x2xk_1.01   MatchIt_3.0.2           tableone_0.12.0        
## [13] stargazer_5.2.2         reshape2_1.4.4          exactRankTests_0.8-31  
## [16] gridExtra_2.3           foreign_0.8-80          glpkAPI_1.3.2          
## [19] designmatch_0.3.1       Rglpk_0.6-4             slam_0.1-47            
## [22] MASS_7.3-51.6           survMisc_0.5.5          ggfortify_0.4.10       
## [25] rateratio.test_1.0-2    survminer_0.4.8         ggpubr_0.4.0           
## [28] epiR_1.0-15             forcats_0.5.0           purrr_0.3.4            
## [31] readr_1.3.1             tibble_3.0.3            tidyverse_1.3.0        
## [34] treemapify_2.5.3        ggiraph_0.7.0           chilemapas_0.2         
## [37] sf_0.9-3                finalfit_1.0.1          lsmeans_2.30-0         
## [40] emmeans_1.4.8           choroplethrAdmin1_1.1.1 choroplethrMaps_1.0.1  
## [43] choroplethr_3.6.3       acs_2.1.4               XML_3.99-0.3           
## [46] RColorBrewer_1.1-2      panelr_0.7.3            lme4_1.1-23            
## [49] Matrix_1.2-18           dplyr_1.0.1             data.table_1.13.0      
## [52] codebook_0.9.2          devtools_2.3.0          usethis_1.6.1          
## [55] sqldf_0.4-11            RSQLite_2.2.0           gsubfn_0.7             
## [58] proto_1.0.0             broom_0.7.0             zoo_1.8-8              
## [61] altair_4.0.1            rbokeh_0.5.1            janitor_2.0.1          
## [64] plotly_4.9.2.1          kableExtra_1.1.0        Hmisc_4.4-0            
## [67] Formula_1.2-3           survival_3.1-12         lattice_0.20-41        
## [70] ggplot2_3.3.2           stringr_1.4.0           stringi_1.4.6          
## [73] tidyr_1.1.1             knitr_1.29              matrixStats_0.56.0     
## [76] boot_1.3-25            
## 
## loaded via a namespace (and not attached):
##   [1] class_7.3-17        ps_1.3.3            rprojroot_1.3-2    
##   [4] crayon_1.3.4        V8_3.1.0            nlme_3.1-148       
##   [7] backports_1.1.7     reprex_0.3.0        ggcorrplot_0.1.3   
##  [10] rlang_0.4.7         readxl_1.3.1        performance_0.4.8  
##  [13] nloptr_1.2.2.2      callr_3.4.3         rjson_0.2.20       
##  [16] cmprsk_2.2-10       ggmap_3.0.0         bit64_0.9-7        
##  [19] glue_1.4.1          sjPlot_2.8.4        parallel_4.0.2     
##  [22] processx_3.4.3      classInt_0.4-3      tcltk_4.0.2        
##  [25] haven_2.3.1         tidyselect_1.1.0    km.ci_0.5-2        
##  [28] rio_0.5.16          sjmisc_2.8.5        chron_2.3-55       
##  [31] xtable_1.8-4        magrittr_1.5        evaluate_0.14      
##  [34] RgoogleMaps_1.4.5.3 cli_2.0.2           Epi_2.40           
##  [37] rstudioapi_0.11     sp_1.4-2            rpart_4.1-15       
##  [40] jtools_2.0.5        sjlabelled_1.1.6    RJSONIO_1.3-1.4    
##  [43] maps_3.3.0          gistr_0.5.0         xfun_0.16          
##  [46] parameters_0.8.2    pkgbuild_1.1.0      cluster_2.1.0      
##  [49] ggfittext_0.9.0     png_0.1-7           withr_2.2.0        
##  [52] bitops_1.0-6        plyr_1.8.6          cellranger_1.1.0   
##  [55] e1071_1.7-3         survey_4.0          coda_0.19-3        
##  [58] pillar_1.4.6        multcomp_1.4-13     fs_1.5.0           
##  [61] vctrs_0.3.2         ellipsis_0.3.1      generics_0.0.2     
##  [64] rgdal_1.5-8         tools_4.0.2         munsell_0.5.0      
##  [67] compiler_4.0.2      pkgload_1.1.0       abind_1.4-5        
##  [70] tigris_0.9.4        sessioninfo_1.1.1   visNetwork_2.0.9   
##  [73] jsonlite_1.7.0      WDI_2.6.0           scales_1.1.1       
##  [76] carData_3.0-4       estimability_1.3    lazyeval_0.2.2     
##  [79] car_3.0-8           latticeExtra_0.6-29 reticulate_1.16    
##  [82] effectsize_0.3.2    checkmate_2.0.0     rmarkdown_2.5      
##  [85] openxlsx_4.1.5      sandwich_2.5-1      statmod_1.4.34     
##  [88] webshot_0.5.2       pander_0.6.3        numDeriv_2016.8-1.1
##  [91] yaml_2.2.1          systemfonts_0.2.3   htmltools_0.5.0    
##  [94] memoise_1.1.0       viridisLite_0.3.0   jsonvalidate_1.1.0 
##  [97] digest_0.6.25       assertthat_0.2.1    rappdirs_0.3.1     
## [100] repr_1.1.0          bayestestR_0.7.2    BiasedUrn_1.07     
## [103] KMsurv_0.1-5        units_0.6-6         remotes_2.2.0      
## [106] blob_1.2.1          splines_4.0.2       labeling_0.3       
## [109] hms_0.5.3           rmapshaper_0.4.4    modelr_0.1.8       
## [112] colorspace_1.4-1    base64enc_0.1-3     nnet_7.3-14        
## [115] mvtnorm_1.1-1       fansi_0.4.1         R6_2.4.1           
## [118] grid_4.0.2          crul_0.9.0          lifecycle_0.2.0    
## [121] acepack_1.4.1       labelled_2.5.0      zip_2.1.1          
## [124] curl_4.3            geojsonlint_0.4.0   ggsignif_0.6.0     
## [127] pryr_0.1.4          minqa_1.2.4         testthat_2.3.2     
## [130] snakecase_0.11.0    desc_1.2.0          TH.data_1.0-10     
## [133] htmlwidgets_1.5.1   crosstalk_1.1.0.1   rvest_0.3.6        
## [136] mgcv_1.8-31         insight_0.9.0       htmlTable_2.0.1    
## [139] codetools_0.2-16    lubridate_1.7.9     prettyunits_1.1.1  
## [142] dbplyr_1.4.4        vegawidget_0.3.1    gtable_0.3.0       
## [145] DBI_1.1.0           etm_1.1             httr_1.4.2         
## [148] highr_0.8           KernSmooth_2.23-17  farver_2.0.3       
## [151] uuid_0.1-4          hexbin_1.28.1       mice_3.11.0        
## [154] xml2_1.3.2          ggeffects_0.15.1    bit_1.1-15.2       
## [157] sjstats_0.18.0      jpeg_0.1-8.1        pkgconfig_2.0.3    
## [160] maptools_1.0-1      rstatix_0.6.0       mitools_2.4        
## [163] httpcode_0.3.0